R Markdown

#install.packages('TDAmapper')
library(TDAmapper)
library(cluster)
library(vip)
## 
## Attaching package: 'vip'
## The following object is masked from 'package:utils':
## 
##     vi
#install.packages('kernlab’)
library(kernlab)
#install.packages(‘class’)
library(class)
#install.packages('nnet')
library(nnet)
#install.packages(‘randomForest’)
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
#install.packages('e1071')
library(e1071)                                                  
#install.packages("BayesFactor")
library(BayesFactor)
## Loading required package: coda
## 
## Attaching package: 'coda'
## The following object is masked from 'package:kernlab':
## 
##     nvar
## Loading required package: Matrix
## ************
## Welcome to BayesFactor 0.9.12-4.5. If you have questions, please contact Richard Morey (richarddmorey@gmail.com).
## 
## Type BFManual() to open the manual.
## ************
library(BayesPPD)
library(bayestestR)
#install.packages('igraph')
library('igraph')
## Warning: package 'igraph' was built under R version 4.3.3
## 
## Attaching package: 'igraph'
## The following object is masked from 'package:BayesFactor':
## 
##     compare
## The following object is masked from 'package:class':
## 
##     knn
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
#install.packages('locfit')
library(locfit)
## locfit 1.5-9.8    2023-06-11
#install.packages('ggplot2’)
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:randomForest':
## 
##     margin
## The following object is masked from 'package:kernlab':
## 
##     alpha
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:igraph':
## 
##     as_data_frame, groups, union
## The following object is masked from 'package:randomForest':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#install.packages('networkD3')
library(networkD3)
library(rstanarm)
## Loading required package: Rcpp
## This is rstanarm version 2.26.1
## - See https://mc-stan.org/rstanarm/articles/priors for changes to default priors!
## - Default priors may change, so it's safest to specify priors, even if equivalent to the defaults.
## - For execution on a local, multicore CPU with excess RAM we recommend calling
##   options(mc.cores = parallel::detectCores())
library(see)
#install.packages('tidyverse')
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0
## ✔ readr     2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%--%()       masks igraph::%--%()
## ✖ ggplot2::alpha()        masks kernlab::alpha()
## ✖ tibble::as_data_frame() masks dplyr::as_data_frame(), igraph::as_data_frame()
## ✖ dplyr::combine()        masks randomForest::combine()
## ✖ purrr::compose()        masks igraph::compose()
## ✖ purrr::cross()          masks kernlab::cross()
## ✖ tidyr::crossing()       masks igraph::crossing()
## ✖ tidyr::expand()         masks Matrix::expand()
## ✖ dplyr::filter()         masks stats::filter()
## ✖ dplyr::lag()            masks stats::lag()
## ✖ ggplot2::margin()       masks randomForest::margin()
## ✖ purrr::none()           masks locfit::none()
## ✖ tidyr::pack()           masks Matrix::pack()
## ✖ purrr::simplify()       masks igraph::simplify()
## ✖ tidyr::unpack()         masks Matrix::unpack()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#install.packages('caret')
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:purrr':
## 
##     lift
## 
## The following objects are masked from 'package:rstanarm':
## 
##     compare_models, R2
#install.packages('ISLR')
library(ISLR)
#install.packages('MCMCpack')
library(MCMCpack)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## ##
## ## Markov Chain Monte Carlo Package (MCMCpack)
## ## Copyright (C) 2003-2025 Andrew D. Martin, Kevin M. Quinn, and Jong Hee Park
## ##
## ## Support provided by the U.S. National Science Foundation
## ## (Grants SES-0350646 and SES-0350613)
## ##
#linstall.packages("caret")
library(caret)
library(TDA)
## 
## Attaching package: 'TDA'
## 
## The following object is masked from 'package:cluster':
## 
##     silhouette
library(TDAstats)
library(ks)
## 
## Attaching package: 'ks'
## 
## The following object is masked from 'package:TDA':
## 
##     kde
## 
## The following object is masked from 'package:MCMCpack':
## 
##     vech
## 
## The following object is masked from 'package:igraph':
## 
##     compare
## 
## The following object is masked from 'package:BayesFactor':
## 
##     compare
#install.packages('MLmetrics')
library(MLmetrics)
## 
## Attaching package: 'MLmetrics'
## 
## The following objects are masked from 'package:caret':
## 
##     MAE, RMSE
## 
## The following object is masked from 'package:base':
## 
##     Recall
#install.packages('googledrive')
library(googledrive)
#install.packages('stringr')
library(stringr)
#install.packages('ks')
library(ks)
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
#import adult dataset from UCI repository stored on my desktop

#Adult **
adult <- read.csv("~/Desktop/NCU/DissertationDatasets/Adult/adult.data", header=FALSE)
  head(str(adult))
## 'data.frame':    32561 obs. of  15 variables:
##  $ V1 : int  39 50 38 53 28 37 49 52 31 42 ...
##  $ V2 : chr  " State-gov" " Self-emp-not-inc" " Private" " Private" ...
##  $ V3 : int  77516 83311 215646 234721 338409 284582 160187 209642 45781 159449 ...
##  $ V4 : chr  " Bachelors" " Bachelors" " HS-grad" " 11th" ...
##  $ V5 : int  13 13 9 7 13 14 5 9 14 13 ...
##  $ V6 : chr  " Never-married" " Married-civ-spouse" " Divorced" " Married-civ-spouse" ...
##  $ V7 : chr  " Adm-clerical" " Exec-managerial" " Handlers-cleaners" " Handlers-cleaners" ...
##  $ V8 : chr  " Not-in-family" " Husband" " Not-in-family" " Husband" ...
##  $ V9 : chr  " White" " White" " White" " Black" ...
##  $ V10: chr  " Male" " Male" " Male" " Male" ...
##  $ V11: int  2174 0 0 0 0 0 0 0 14084 5178 ...
##  $ V12: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ V13: int  40 13 40 40 40 40 16 45 50 40 ...
##  $ V14: chr  " United-States" " United-States" " United-States" " United-States" ...
##  $ V15: chr  " <=50K" " <=50K" " <=50K" " <=50K" ...
## NULL
  summary(adult)
##        V1             V2                  V3               V4           
##  Min.   :17.00   Length:32561       Min.   :  12285   Length:32561      
##  1st Qu.:28.00   Class :character   1st Qu.: 117827   Class :character  
##  Median :37.00   Mode  :character   Median : 178356   Mode  :character  
##  Mean   :38.58                      Mean   : 189778                     
##  3rd Qu.:48.00                      3rd Qu.: 237051                     
##  Max.   :90.00                      Max.   :1484705                     
##        V5             V6                 V7                 V8           
##  Min.   : 1.00   Length:32561       Length:32561       Length:32561      
##  1st Qu.: 9.00   Class :character   Class :character   Class :character  
##  Median :10.00   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :10.08                                                           
##  3rd Qu.:12.00                                                           
##  Max.   :16.00                                                           
##       V9                V10                 V11             V12        
##  Length:32561       Length:32561       Min.   :    0   Min.   :   0.0  
##  Class :character   Class :character   1st Qu.:    0   1st Qu.:   0.0  
##  Mode  :character   Mode  :character   Median :    0   Median :   0.0  
##                                        Mean   : 1078   Mean   :  87.3  
##                                        3rd Qu.:    0   3rd Qu.:   0.0  
##                                        Max.   :99999   Max.   :4356.0  
##       V13            V14                V15           
##  Min.   : 1.00   Length:32561       Length:32561      
##  1st Qu.:40.00   Class :character   Class :character  
##  Median :40.00   Mode  :character   Mode  :character  
##  Mean   :40.44                                        
##  3rd Qu.:45.00                                        
##  Max.   :99.00
  ggpairs(adult[,c(1,3,5,11,12,13,15)])
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

##Add Bayesian tests functions

#create function to conduct the Bayesian Sign Test
BayesianSignTest <- function(diffVector,rope_min,rope_max) {

  library(MCMCpack)

  samples <- 3000

  #build the vector 0.5 1 1 ....... 1 

  weights <- c(0.5,rep(1,length(diffVector)))

  #add the fake first observation in 0

  diffVector <- c (0, diffVector)  


  #for the moment we implement the sign test. Signedrank will follows

  probLeft <- mean (diffVector < rope_min)

  probRope <- mean (diffVector > rope_min & diffVector < rope_max)

  probRight <- mean (diffVector > rope_max)

  results = list ("probLeft"=probLeft, "probRope"=probRope,
                  
                  "probRight"=probRight)
  
  return (results)
}


##Create function to conduct Bayesian Signed Rank Test

BayesianSignedRank <- function(diffVector,rope_min,rope_max) {
  
  library(MCMCpack)
  
  samples <- 30000
  
  #build the vector 0.5 1 1 ....... 1
  weights <- c(0.5,rep(1,length(diffVector)))
  
  #add the fake first observation in 0
  diffVector <- c (0, diffVector)
  
  sampledWeights <- rdirichlet(samples,weights)
  
  winLeft <- vector(length = samples)
  winRope <- vector(length = samples)
  winRight <- vector(length = samples)
  
  for (rep in 1:samples){
    currentWeights <- sampledWeights[rep,]
    for (i in 1:length(currentWeights)){
      for (j in 1:length(currentWeights)){
        product= currentWeights[i] * currentWeights[j]
        if (diffVector[i]+diffVector[j] > (2*rope_max) ) {
          winRight[rep] <- winRight[rep] + product
        }
        else if (diffVector[i]+diffVector[j] > (2*rope_min) ) {
          winRope[rep] <- winRope[rep] + product
        }
        else {
          winLeft[rep] <- winLeft[rep] + product
        }

      }
    }
    maxWins=max(winRight[rep],winRope[rep],winLeft[rep])
    winners = (winRight[rep]==maxWins)*1 + (winRope[rep]==maxWins)*1 + (winLeft[rep]==maxWins)*1
    winRight[rep] <- (winRight[rep]==maxWins)*1/winners
    winRope[rep] <- (winRope[rep]==maxWins)*1/winners
    winLeft[rep] <- (winLeft[rep]==maxWins)*1/winners
  }
  
  
  results = list ("winLeft"=mean(winLeft), "winRope"=mean(winRope),
                  "winRight"=mean(winRight) )
  return (results)
  
}


#Create function to conduct the Bayesian Correlated t.test

#diff_a_b is a vector of differences between the two classifiers, on each fold of cross-validation.
#If you have done 10 runs of 10-folds cross-validation, you have 100 results for each classifier.
#You should have run cross-validation on the same folds for the two classifiers.
#Then diff_a_b is the difference fold-by-fold.

#rho is the correlation of the cross-validation results: 1/(number of folds)
#rope_min and rope_max are the lower and the upper bound of the rope
 
correlatedBayesianTtest <- function(diff_a_b,rho,rope_min,rope_max){
   if (rope_max < rope_min){
     stop("rope_max should be larger than rope_min")
   }
     
  delta <- mean(diff_a_b)
  n <- length(diff_a_b)
  df <- n-1
  stdX <- sd(diff_a_b)
  sp <- sd(diff_a_b)*sqrt(1/n + rho/(1-rho))
  p.left <- pt((rope_min - delta)/sp, df)
  p.rope <- pt((rope_max - delta)/sp, df)-p.left
  results <- list('left'=p.left,'rope'=p.rope,'right'=1-p.left-p.rope)
  return (results)
}
set.seed(16974)
###Prepare datasets for One hot encoding if necessary and Persistent homology of each dataset.
##One hot encoding for adult dataset
library(caret)

#define one-hot encoding function
dummy.adult <- dummyVars(" ~ .", data=adult)

#perform one-hot encoding on data frame
adult.one_hot_df <- data.frame(predict(dummy.adult, newdata=adult))


#str final data frame
head(str(adult.one_hot_df))
## 'data.frame':    32561 obs. of  110 variables:
##  $ V1                            : num  39 50 38 53 28 37 49 52 31 42 ...
##  $ V2..                          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Federal.gov                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Local.gov                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Never.worked               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Private                    : num  0 0 1 1 1 1 1 0 1 1 ...
##  $ V2.Self.emp.inc               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Self.emp.not.inc           : num  0 1 0 0 0 0 0 1 0 0 ...
##  $ V2.State.gov                  : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Without.pay                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V3                            : num  77516 83311 215646 234721 338409 ...
##  $ V4.10th                       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.11th                       : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ V4.12th                       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.1st.4th                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.5th.6th                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.7th.8th                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.9th                        : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ V4.Assoc.acdm                 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Assoc.voc                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Bachelors                  : num  1 1 0 0 1 0 0 0 0 1 ...
##  $ V4.Doctorate                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.HS.grad                    : num  0 0 1 0 0 0 0 1 0 0 ...
##  $ V4.Masters                    : num  0 0 0 0 0 1 0 0 1 0 ...
##  $ V4.Preschool                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Prof.school                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Some.college               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V5                            : num  13 13 9 7 13 14 5 9 14 13 ...
##  $ V6.Divorced                   : num  0 0 1 0 0 0 0 0 0 0 ...
##  $ V6.Married.AF.spouse          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V6.Married.civ.spouse         : num  0 1 0 1 1 1 0 1 0 1 ...
##  $ V6.Married.spouse.absent      : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ V6.Never.married              : num  1 0 0 0 0 0 0 0 1 0 ...
##  $ V6.Separated                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V6.Widowed                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7..                          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Adm.clerical               : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Armed.Forces               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Craft.repair               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Exec.managerial            : num  0 1 0 0 0 1 0 1 0 1 ...
##  $ V7.Farming.fishing            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Handlers.cleaners          : num  0 0 1 1 0 0 0 0 0 0 ...
##  $ V7.Machine.op.inspct          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Other.service              : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ V7.Priv.house.serv            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Prof.specialty             : num  0 0 0 0 1 0 0 0 1 0 ...
##  $ V7.Protective.serv            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Sales                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Tech.support               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Transport.moving           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V8.Husband                    : num  0 1 0 1 0 0 0 1 0 1 ...
##  $ V8.Not.in.family              : num  1 0 1 0 0 0 1 0 1 0 ...
##  $ V8.Other.relative             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V8.Own.child                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V8.Unmarried                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V8.Wife                       : num  0 0 0 0 1 1 0 0 0 0 ...
##  $ V9.Amer.Indian.Eskimo         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V9.Asian.Pac.Islander         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V9.Black                      : num  0 0 0 1 1 0 1 0 0 0 ...
##  $ V9.Other                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V9.White                      : num  1 1 1 0 0 1 0 1 1 1 ...
##  $ V10.Female                    : num  0 0 0 0 1 1 1 0 1 0 ...
##  $ V10.Male                      : num  1 1 1 1 0 0 0 1 0 1 ...
##  $ V11                           : num  2174 0 0 0 0 ...
##  $ V12                           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V13                           : num  40 13 40 40 40 40 16 45 50 40 ...
##  $ V14..                         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Cambodia                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Canada                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.China                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Columbia                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Cuba                      : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ V14.Dominican.Republic        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Ecuador                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.El.Salvador               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.England                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.France                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Germany                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Greece                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Guatemala                 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Haiti                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Holand.Netherlands        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Honduras                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Hong                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Hungary                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.India                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Iran                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Ireland                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Italy                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Jamaica                   : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ V14.Japan                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Laos                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Mexico                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Nicaragua                 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Outlying.US.Guam.USVI.etc.: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Peru                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Philippines               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Poland                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Portugal                  : num  0 0 0 0 0 0 0 0 0 0 ...
##   [list output truncated]
## NULL
adult_df1<-adult[,15]
adult.one_hot_df1<-cbind(adult.one_hot_df,adult_df1)
adult.one_hot_df2<-adult.one_hot_df1[,c(1,11,28,64,65,66)]

##Persistent homology of adult dataset

#create a random sample of adult.one_hot dataset to see if a barcode and persistent diagram can resolve from size of the dataset. 

adult.one_hot_1000_df <- adult.one_hot_df[sample(nrow(adult.one_hot_df), size = 1000, replace = FALSE), ]
head(str(adult.one_hot_1000_df))
## 'data.frame':    1000 obs. of  110 variables:
##  $ V1                            : num  33 25 39 21 32 26 20 58 24 63 ...
##  $ V2..                          : num  0 0 0 0 0 0 1 1 0 0 ...
##  $ V2.Federal.gov                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Local.gov                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Never.worked               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Private                    : num  1 1 1 1 1 1 0 0 1 0 ...
##  $ V2.Self.emp.inc               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Self.emp.not.inc           : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ V2.State.gov                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Without.pay                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V3                            : num  176992 105693 234901 198050 134886 ...
##  $ V4.10th                       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.11th                       : num  0 0 0 0 0 0 0 1 0 0 ...
##  $ V4.12th                       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.1st.4th                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.5th.6th                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.7th.8th                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.9th                        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Assoc.acdm                 : num  0 0 1 1 0 0 0 0 0 0 ...
##  $ V4.Assoc.voc                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Bachelors                  : num  0 1 0 0 0 1 0 0 0 0 ...
##  $ V4.Doctorate                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.HS.grad                    : num  0 0 0 0 1 0 0 0 1 0 ...
##  $ V4.Masters                    : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Preschool                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Prof.school                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Some.college               : num  0 0 0 0 0 0 1 0 0 1 ...
##  $ V5                            : num  14 13 12 12 9 13 10 7 9 10 ...
##  $ V6.Divorced                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V6.Married.AF.spouse          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V6.Married.civ.spouse         : num  1 0 0 0 1 0 0 1 0 1 ...
##  $ V6.Married.spouse.absent      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V6.Never.married              : num  0 1 0 1 0 1 1 0 1 0 ...
##  $ V6.Separated                  : num  0 0 1 0 0 0 0 0 0 0 ...
##  $ V6.Widowed                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7..                          : num  0 0 0 0 0 0 1 1 0 0 ...
##  $ V7.Adm.clerical               : num  0 0 1 1 1 0 0 0 0 0 ...
##  $ V7.Armed.Forces               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Craft.repair               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Exec.managerial            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Farming.fishing            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Handlers.cleaners          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Machine.op.inspct          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Other.service              : num  0 0 0 0 0 0 0 0 1 1 ...
##  $ V7.Priv.house.serv            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Prof.specialty             : num  1 1 0 0 0 1 0 0 0 0 ...
##  $ V7.Protective.serv            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Sales                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Tech.support               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Transport.moving           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V8.Husband                    : num  1 0 0 0 0 0 0 1 0 1 ...
##  $ V8.Not.in.family              : num  0 1 0 1 0 1 0 0 0 0 ...
##  $ V8.Other.relative             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V8.Own.child                  : num  0 0 0 0 0 0 0 0 1 0 ...
##  $ V8.Unmarried                  : num  0 0 1 0 0 0 1 0 0 0 ...
##  $ V8.Wife                       : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ V9.Amer.Indian.Eskimo         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V9.Asian.Pac.Islander         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V9.Black                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V9.Other                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V9.White                      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V10.Female                    : num  0 1 0 1 1 1 1 0 1 0 ...
##  $ V10.Male                      : num  1 0 1 0 0 0 0 1 0 1 ...
##  $ V11                           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V12                           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V13                           : num  40 40 40 25 40 40 20 16 25 48 ...
##  $ V14..                         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Cambodia                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Canada                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.China                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Columbia                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Cuba                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Dominican.Republic        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Ecuador                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.El.Salvador               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.England                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.France                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Germany                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Greece                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Guatemala                 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Haiti                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Holand.Netherlands        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Honduras                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Hong                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Hungary                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.India                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Iran                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Ireland                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Italy                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Jamaica                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Japan                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Laos                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Mexico                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Nicaragua                 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Outlying.US.Guam.USVI.etc.: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Peru                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Philippines               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Poland                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Portugal                  : num  0 0 0 0 0 0 0 0 0 0 ...
##   [list output truncated]
## NULL
summary(adult.one_hot_1000_df)
##        V1             V2..       V2.Federal.gov   V2.Local.gov  
##  Min.   :17.00   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:28.00   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000  
##  Median :37.00   Median :0.000   Median :0.000   Median :0.000  
##  Mean   :38.64   Mean   :0.077   Mean   :0.025   Mean   :0.064  
##  3rd Qu.:47.00   3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :90.00   Max.   :1.000   Max.   :1.000   Max.   :1.000  
##  V2.Never.worked   V2.Private    V2.Self.emp.inc V2.Self.emp.not.inc
##  Min.   :0       Min.   :0.000   Min.   :0.000   Min.   :0.000      
##  1st Qu.:0       1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000      
##  Median :0       Median :1.000   Median :0.000   Median :0.000      
##  Mean   :0       Mean   :0.679   Mean   :0.037   Mean   :0.079      
##  3rd Qu.:0       3rd Qu.:1.000   3rd Qu.:0.000   3rd Qu.:0.000      
##  Max.   :0       Max.   :1.000   Max.   :1.000   Max.   :1.000      
##   V2.State.gov   V2.Without.pay       V3            V4.10th     
##  Min.   :0.000   Min.   :0      Min.   : 19302   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0      1st Qu.:123797   1st Qu.:0.000  
##  Median :0.000   Median :0      Median :181982   Median :0.000  
##  Mean   :0.039   Mean   :0      Mean   :195583   Mean   :0.041  
##  3rd Qu.:0.000   3rd Qu.:0      3rd Qu.:242529   3rd Qu.:0.000  
##  Max.   :1.000   Max.   :0      Max.   :721161   Max.   :1.000  
##     V4.11th         V4.12th        V4.1st.4th      V4.5th.6th   
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000  
##  Median :0.000   Median :0.000   Median :0.000   Median :0.000  
##  Mean   :0.032   Mean   :0.015   Mean   :0.005   Mean   :0.015  
##  3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :1.000   Max.   :1.000   Max.   :1.000   Max.   :1.000  
##    V4.7th.8th        V4.9th      V4.Assoc.acdm   V4.Assoc.voc    V4.Bachelors  
##  Min.   :0.000   Min.   :0.000   Min.   :0.00   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.00   1st Qu.:0.000   1st Qu.:0.000  
##  Median :0.000   Median :0.000   Median :0.00   Median :0.000   Median :0.000  
##  Mean   :0.015   Mean   :0.018   Mean   :0.04   Mean   :0.052   Mean   :0.155  
##  3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.00   3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :1.000   Max.   :1.000   Max.   :1.00   Max.   :1.000   Max.   :1.000  
##   V4.Doctorate     V4.HS.grad      V4.Masters     V4.Preschool  
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000  
##  Median :0.000   Median :0.000   Median :0.000   Median :0.000  
##  Mean   :0.014   Mean   :0.327   Mean   :0.053   Mean   :0.002  
##  3rd Qu.:0.000   3rd Qu.:1.000   3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :1.000   Max.   :1.000   Max.   :1.000   Max.   :1.000  
##  V4.Prof.school  V4.Some.college       V5      V6.Divorced   
##  Min.   :0.000   Min.   :0.000   Min.   : 1   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.: 9   1st Qu.:0.000  
##  Median :0.000   Median :0.000   Median :10   Median :0.000  
##  Mean   :0.014   Mean   :0.202   Mean   :10   Mean   :0.132  
##  3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:12   3rd Qu.:0.000  
##  Max.   :1.000   Max.   :1.000   Max.   :16   Max.   :1.000  
##  V6.Married.AF.spouse V6.Married.civ.spouse V6.Married.spouse.absent
##  Min.   :0            Min.   :0.000         Min.   :0.000           
##  1st Qu.:0            1st Qu.:0.000         1st Qu.:0.000           
##  Median :0            Median :0.000         Median :0.000           
##  Mean   :0            Mean   :0.464         Mean   :0.005           
##  3rd Qu.:0            3rd Qu.:1.000         3rd Qu.:0.000           
##  Max.   :0            Max.   :1.000         Max.   :1.000           
##  V6.Never.married  V6.Separated     V6.Widowed         V7..      
##  Min.   :0.000    Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000    1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000  
##  Median :0.000    Median :0.000   Median :0.000   Median :0.000  
##  Mean   :0.325    Mean   :0.041   Mean   :0.033   Mean   :0.077  
##  3rd Qu.:1.000    3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :1.000    Max.   :1.000   Max.   :1.000   Max.   :1.000  
##  V7.Adm.clerical V7.Armed.Forces V7.Craft.repair V7.Exec.managerial
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000     
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000     
##  Median :0.000   Median :0.000   Median :0.000   Median :0.000     
##  Mean   :0.117   Mean   :0.001   Mean   :0.129   Mean   :0.124     
##  3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000     
##  Max.   :1.000   Max.   :1.000   Max.   :1.000   Max.   :1.000     
##  V7.Farming.fishing V7.Handlers.cleaners V7.Machine.op.inspct V7.Other.service
##  Min.   :0.000      Min.   :0.000        Min.   :0.000        Min.   :0.000   
##  1st Qu.:0.000      1st Qu.:0.000        1st Qu.:0.000        1st Qu.:0.000   
##  Median :0.000      Median :0.000        Median :0.000        Median :0.000   
##  Mean   :0.029      Mean   :0.041        Mean   :0.071        Mean   :0.091   
##  3rd Qu.:0.000      3rd Qu.:0.000        3rd Qu.:0.000        3rd Qu.:0.000   
##  Max.   :1.000      Max.   :1.000        Max.   :1.000        Max.   :1.000   
##  V7.Priv.house.serv V7.Prof.specialty V7.Protective.serv    V7.Sales    
##  Min.   :0.000      Min.   :0.000     Min.   :0.000      Min.   :0.000  
##  1st Qu.:0.000      1st Qu.:0.000     1st Qu.:0.000      1st Qu.:0.000  
##  Median :0.000      Median :0.000     Median :0.000      Median :0.000  
##  Mean   :0.003      Mean   :0.119     Mean   :0.018      Mean   :0.102  
##  3rd Qu.:0.000      3rd Qu.:0.000     3rd Qu.:0.000      3rd Qu.:0.000  
##  Max.   :1.000      Max.   :1.000     Max.   :1.000      Max.   :1.000  
##  V7.Tech.support V7.Transport.moving   V8.Husband   V8.Not.in.family
##  Min.   :0.000   Min.   :0.000       Min.   :0.00   Min.   :0.000   
##  1st Qu.:0.000   1st Qu.:0.000       1st Qu.:0.00   1st Qu.:0.000   
##  Median :0.000   Median :0.000       Median :0.00   Median :0.000   
##  Mean   :0.035   Mean   :0.043       Mean   :0.41   Mean   :0.261   
##  3rd Qu.:0.000   3rd Qu.:0.000       3rd Qu.:1.00   3rd Qu.:1.000   
##  Max.   :1.000   Max.   :1.000       Max.   :1.00   Max.   :1.000   
##  V8.Other.relative  V8.Own.child    V8.Unmarried      V8.Wife     
##  Min.   :0.000     Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000     1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000  
##  Median :0.000     Median :0.000   Median :0.000   Median :0.000  
##  Mean   :0.027     Mean   :0.136   Mean   :0.115   Mean   :0.051  
##  3rd Qu.:0.000     3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :1.000     Max.   :1.000   Max.   :1.000   Max.   :1.000  
##  V9.Amer.Indian.Eskimo V9.Asian.Pac.Islander    V9.Black        V9.Other    
##  Min.   :0.000         Min.   :0.000         Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000         1st Qu.:0.000         1st Qu.:0.000   1st Qu.:0.000  
##  Median :0.000         Median :0.000         Median :0.000   Median :0.000  
##  Mean   :0.014         Mean   :0.029         Mean   :0.104   Mean   :0.007  
##  3rd Qu.:0.000         3rd Qu.:0.000         3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :1.000         Max.   :1.000         Max.   :1.000   Max.   :1.000  
##     V9.White       V10.Female       V10.Male          V11         
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :    0.0  
##  1st Qu.:1.000   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:    0.0  
##  Median :1.000   Median :0.000   Median :1.000   Median :    0.0  
##  Mean   :0.846   Mean   :0.339   Mean   :0.661   Mean   :  868.9  
##  3rd Qu.:1.000   3rd Qu.:1.000   3rd Qu.:1.000   3rd Qu.:    0.0  
##  Max.   :1.000   Max.   :1.000   Max.   :1.000   Max.   :99999.0  
##       V12               V13           V14..        V14.Cambodia   V14.Canada   
##  Min.   :   0.00   Min.   : 1.0   Min.   :0.000   Min.   :0     Min.   :0.000  
##  1st Qu.:   0.00   1st Qu.:40.0   1st Qu.:0.000   1st Qu.:0     1st Qu.:0.000  
##  Median :   0.00   Median :40.0   Median :0.000   Median :0     Median :0.000  
##  Mean   :  92.56   Mean   :40.5   Mean   :0.024   Mean   :0     Mean   :0.003  
##  3rd Qu.:   0.00   3rd Qu.:45.0   3rd Qu.:0.000   3rd Qu.:0     3rd Qu.:0.000  
##  Max.   :2457.00   Max.   :99.0   Max.   :1.000   Max.   :0     Max.   :1.000  
##    V14.China      V14.Columbia      V14.Cuba     V14.Dominican.Republic
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000         
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000         
##  Median :0.000   Median :0.000   Median :0.000   Median :0.000         
##  Mean   :0.003   Mean   :0.002   Mean   :0.005   Mean   :0.002         
##  3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000         
##  Max.   :1.000   Max.   :1.000   Max.   :1.000   Max.   :1.000         
##   V14.Ecuador    V14.El.Salvador  V14.England      V14.France   
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000  
##  Median :0.000   Median :0.000   Median :0.000   Median :0.000  
##  Mean   :0.001   Mean   :0.003   Mean   :0.003   Mean   :0.001  
##  3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :1.000   Max.   :1.000   Max.   :1.000   Max.   :1.000  
##   V14.Germany      V14.Greece    V14.Guatemala   V14.Haiti    
##  Min.   :0.000   Min.   :0.000   Min.   :0     Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0     1st Qu.:0.000  
##  Median :0.000   Median :0.000   Median :0     Median :0.000  
##  Mean   :0.002   Mean   :0.002   Mean   :0     Mean   :0.002  
##  3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0     3rd Qu.:0.000  
##  Max.   :1.000   Max.   :1.000   Max.   :0     Max.   :1.000  
##  V14.Holand.Netherlands  V14.Honduras    V14.Hong      V14.Hungary   
##  Min.   :0              Min.   :0     Min.   :0.000   Min.   :0.000  
##  1st Qu.:0              1st Qu.:0     1st Qu.:0.000   1st Qu.:0.000  
##  Median :0              Median :0     Median :0.000   Median :0.000  
##  Mean   :0              Mean   :0     Mean   :0.001   Mean   :0.002  
##  3rd Qu.:0              3rd Qu.:0     3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :0              Max.   :0     Max.   :1.000   Max.   :1.000  
##    V14.India        V14.Iran  V14.Ireland   V14.Italy      V14.Jamaica   
##  Min.   :0.000   Min.   :0   Min.   :0    Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0   1st Qu.:0    1st Qu.:0.000   1st Qu.:0.000  
##  Median :0.000   Median :0   Median :0    Median :0.000   Median :0.000  
##  Mean   :0.004   Mean   :0   Mean   :0    Mean   :0.003   Mean   :0.003  
##  3rd Qu.:0.000   3rd Qu.:0   3rd Qu.:0    3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :1.000   Max.   :0   Max.   :0    Max.   :1.000   Max.   :1.000  
##    V14.Japan        V14.Laos   V14.Mexico    V14.Nicaragua
##  Min.   :0.000   Min.   :0   Min.   :0.000   Min.   :0    
##  1st Qu.:0.000   1st Qu.:0   1st Qu.:0.000   1st Qu.:0    
##  Median :0.000   Median :0   Median :0.000   Median :0    
##  Mean   :0.003   Mean   :0   Mean   :0.022   Mean   :0    
##  3rd Qu.:0.000   3rd Qu.:0   3rd Qu.:0.000   3rd Qu.:0    
##  Max.   :1.000   Max.   :0   Max.   :1.000   Max.   :0    
##  V14.Outlying.US.Guam.USVI.etc.    V14.Peru V14.Philippines   V14.Poland   
##  Min.   :0                      Min.   :0   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0                      1st Qu.:0   1st Qu.:0.000   1st Qu.:0.000  
##  Median :0                      Median :0   Median :0.000   Median :0.000  
##  Mean   :0                      Mean   :0   Mean   :0.004   Mean   :0.002  
##  3rd Qu.:0                      3rd Qu.:0   3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :0                      Max.   :0   Max.   :1.000   Max.   :1.000  
##   V14.Portugal   V14.Puerto.Rico  V14.Scotland   V14.South       V14.Taiwan   
##  Min.   :0.000   Min.   :0.000   Min.   :0     Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0     1st Qu.:0.000   1st Qu.:0.000  
##  Median :0.000   Median :0.000   Median :0     Median :0.000   Median :0.000  
##  Mean   :0.001   Mean   :0.004   Mean   :0     Mean   :0.001   Mean   :0.001  
##  3rd Qu.:0.000   3rd Qu.:0.000   3rd Qu.:0     3rd Qu.:0.000   3rd Qu.:0.000  
##  Max.   :1.000   Max.   :1.000   Max.   :0     Max.   :1.000   Max.   :1.000  
##   V14.Thailand V14.Trinadad.Tobago V14.United.States  V14.Vietnam   
##  Min.   :0     Min.   :0.000       Min.   :0.000     Min.   :0.000  
##  1st Qu.:0     1st Qu.:0.000       1st Qu.:1.000     1st Qu.:0.000  
##  Median :0     Median :0.000       Median :1.000     Median :0.000  
##  Mean   :0     Mean   :0.002       Mean   :0.891     Mean   :0.003  
##  3rd Qu.:0     3rd Qu.:0.000       3rd Qu.:1.000     3rd Qu.:0.000  
##  Max.   :0     Max.   :1.000       Max.   :1.000     Max.   :1.000  
##  V14.Yugoslavia   V15...50K        V15..50K    
##  Min.   :0      Min.   :0.000   Min.   :0.000  
##  1st Qu.:0      1st Qu.:1.000   1st Qu.:0.000  
##  Median :0      Median :1.000   Median :0.000  
##  Mean   :0      Mean   :0.769   Mean   :0.231  
##  3rd Qu.:0      3rd Qu.:1.000   3rd Qu.:0.000  
##  Max.   :0      Max.   :1.000   Max.   :1.000
# calculate persistent homology for adult.one_hot_1000_df
phom.adult.one_hot_1000_df <- calculate_homology(adult.one_hot_1000_df)

# plot barcode for adult.one_hot_1000_df
plot_barcode(phom.adult.one_hot_1000_df)

# plot persistent diagram of adult.one_hot_1000_df dataset
plot_persist(phom.adult.one_hot_1000_df)

#####———————————————MAPPER ALGORITHM————————————————

#Prepare Adult dataset for Mapper 1D algorithm
adult_df1<-adult[,15]
adult.one_hot_df1<-cbind(adult.one_hot_df,adult_df1)
adult.one_hot_df2<-adult.one_hot_df1[,c(1,11,28,64,65,66,109)]
adult.one_hot_df3<-adult.one_hot_df1[,c(1,11,28,62,63,64,65,66)]
adult.one_hot_df4<-adult.one_hot_df1[,-c(109,110)] 

##Two Filter Functions PCA & KDE

#Prepare linear PCA as a filter function by centering and scaling dataset first on all one hot df dataset
b<- prcomp(adult.one_hot_df, center=TRUE, scale=TRUE)
ts_pca_b <- as.data.frame(predict(b, adult.one_hot_df))

#Conduct kernel density estimator as a filter function on 4 of 6
filter.kde <- kde(adult.one_hot_df3[,1:4],H=diag(1,nrow = 4),eval.points =adult.one_hot_df3[,1:4])$estimate


###*** Adult PCA  Mapper 5 intervals, 60% overlap, 5 bins
##*** Adult PCA Mapper 5 intervals, 60% overlap, 5 bins

m_adult_5.60.5 <- mapper1D(
     distance_matrix = dist(adult.one_hot_df),
     filter_values = c(ts_pca_b$PC1),
     num_intervals = 5,
     percent_overlap = 60,
     num_bins_when_clustering = 5)


g_adult_5.60.5 <- graph.adjacency(m_adult_5.60.5$adjacency, mode="undirected")
## Warning: `graph.adjacency()` was deprecated in igraph 2.0.0.
## ℹ Please use `graph_from_adjacency_matrix()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
plot(g_adult_5.60.5, layout = layout.auto(g_adult_5.60.5))
## Warning: `layout.auto()` was deprecated in igraph 2.0.0.
## ℹ Please use `layout_nicely()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

head(str(m_adult_5.60.5$level_of_vertex))
##  int [1:5] 1 2 3 4 5
## NULL
head(str(m_adult_5.60.5$vertices_in_level))
## List of 5
##  $ : num 1
##  $ : num 2
##  $ : num 3
##  $ : num 4
##  $ : num 5
## NULL
head(str(m_adult_5.60.5$points_in_vertex))
## List of 5
##  $ : int [1:6560] 2 8 10 11 12 15 21 26 28 39 ...
##  $ : int [1:13933] 2 8 10 11 12 15 19 20 21 23 ...
##  $ : int [1:15744] 1 2 3 4 5 6 9 11 15 16 ...
##  $ : int [1:19829] 1 3 4 5 6 9 13 14 16 17 ...
##  $ : int [1:16508] 1 3 5 7 13 14 17 18 22 25 ...
## NULL
my_resolution = 100
my_palette = colorRampPalette(c('red','green','lightblue'))
my_max = max(m_adult_5.60.5$level_of_vertex, na.rm=TRUE)
my_vector = m_adult_5.60.5$level_of_vertex / my_max

my_colors = my_palette(my_resolution)[as.numeric(cut(
                       my_vector, breaks=my_resolution))]

g_adult_5.60.5 <- graph.adjacency(m_adult_5.60.5$adjacency, mode="undirected")
vertex_size <- unlist(lapply(m_adult_5.60.5$points_in_vertex,
                             function(x) length(x)))

plot(g_adult_5.60.5, layout = layout.auto(g_adult_5.60.5),
     vertex.size = 30*log(vertex_size)/
     max(log(vertex_size)),
     vertex.color = my_colors)

m_adult_5.60.5.n1<-m_adult_5.60.5$points_in_vertex[1]
    m_adult_5.60.5.n1.vec<-as.vector(unlist(m_adult_5.60.5.n1))
m_adult_5.60.5.n2<-m_adult_5.60.5$points_in_vertex[2]
    m_adult_5.60.5.n2.vec<-as.vector(unlist(m_adult_5.60.5.n2)) 
m_adult_5.60.5.n3<-m_adult_5.60.5$points_in_vertex[3]
    m_adult_5.60.5.n3.vec<-as.vector(unlist(m_adult_5.60.5.n3))
m_adult_5.60.5.n4<-m_adult_5.60.5$points_in_vertex[4]
    m_adult_5.60.5.n4.vec<-as.vector(unlist(m_adult_5.60.5.n4)) 
m_adult_5.60.5.n5<-m_adult_5.60.5$points_in_vertex[5]
    m_adult_5.60.5.n5.vec<-as.vector(unlist(m_adult_5.60.5.n5))

##map the ID’s of each Mapper vertex point to the actual Adult One Hot DF1 dataset
tda.m_adult_5.60.5.n1.vec<-adult.one_hot_df4[m_adult_5.60.5.n1.vec,]
tda.m_adult_5.60.5.n2.vec<-adult.one_hot_df4[m_adult_5.60.5.n2.vec,]
tda.m_adult_5.60.5.n3.vec<-adult.one_hot_df4[m_adult_5.60.5.n3.vec,]
tda.m_adult_5.60.5.n4.vec<-adult.one_hot_df4[m_adult_5.60.5.n4.vec,]
tda.m_adult_5.60.5.n5.vec<-adult.one_hot_df4[m_adult_5.60.5.n5.vec,]


###*** Adult Mapper 5 intervals, 50% overlap, 5 bins

m_adult_5.50.5 <- mapper1D(
     distance_matrix = dist(adult.one_hot_df),
     filter_values = c(ts_pca_b$PC1),
     num_intervals = 5,
     percent_overlap = 50,
     num_bins_when_clustering = 5)


g_adult_5.50.5 <- graph.adjacency(m_adult_5.50.5$adjacency, mode="undirected")
plot(g_adult_5.50.5, layout = layout.auto(g_adult_5.50.5))

head(str(m_adult_5.50.5$level_of_vertex))
##  int [1:5] 1 2 3 4 5
## NULL
head(str(m_adult_5.50.5$vertices_in_level))
## List of 5
##  $ : num 1
##  $ : num 2
##  $ : num 3
##  $ : num 4
##  $ : num 5
## NULL
head(str(m_adult_5.50.5$points_in_vertex))
## List of 5
##  $ : int [1:4917] 8 10 12 21 26 46 64 69 73 87 ...
##  $ : int [1:12206] 2 8 10 11 12 15 21 24 26 28 ...
##  $ : int [1:13240] 1 2 4 5 6 9 11 15 16 19 ...
##  $ : int [1:16700] 1 3 4 5 6 9 13 14 16 17 ...
##  $ : int [1:14404] 3 7 13 14 17 18 22 25 27 32 ...
## NULL
my_resolution = 100
my_palette = colorRampPalette(c('red','green','lightblue'))
my_max = max(m_adult_5.50.5$level_of_vertex, na.rm=TRUE)
my_vector = m_adult_5.50.5$level_of_vertex / my_max

my_colors = my_palette(my_resolution)[as.numeric(cut(
                       my_vector, breaks=my_resolution))]

g_adult_5.50.5 <- graph.adjacency(m_adult_5.50.5$adjacency, mode="undirected")
vertex_size <- unlist(lapply(m_adult_5.50.5$points_in_vertex,
                             function(x) length(x)))

plot(g_adult_5.50.5, layout = layout.auto(g_adult_5.50.5),
     vertex.size = 30*log(vertex_size)/
     max(log(vertex_size)),
     vertex.color = my_colors)

##Extract the ID observations of each mapper output vertex
m_adult_5.50.5.n1<-m_adult_5.50.5$points_in_vertex[1]
    m_adult_5.50.5.n1.vec<-as.vector(unlist(m_adult_5.50.5.n1))
m_adult_5.50.5.n2<-m_adult_5.50.5$points_in_vertex[2]
    m_adult_5.50.5.n2.vec<-as.vector(unlist(m_adult_5.50.5.n2)) 
m_adult_5.50.5.n3<-m_adult_5.50.5$points_in_vertex[3]
    m_adult_5.50.5.n3.vec<-as.vector(unlist(m_adult_5.50.5.n3))
m_adult_5.50.5.n4<-m_adult_5.50.5$points_in_vertex[4]
    m_adult_5.50.5.n4.vec<-as.vector(unlist(m_adult_5.50.5.n4)) 
m_adult_5.50.5.n5<-m_adult_5.50.5$points_in_vertex[5]
    m_adult_5.50.5.n5.vec<-as.vector(unlist(m_adult_5.50.5.n5))

##map the ID’s of each Mapper vertex point to the actual Adult One Hot DF1 dataset
tda.m_adult_5.50.5.n1.vec<-adult.one_hot_df4[m_adult_5.50.5.n1.vec,]
tda.m_adult_5.50.5.n2.vec<-adult.one_hot_df4[m_adult_5.50.5.n2.vec,]
tda.m_adult_5.50.5.n3.vec<-adult.one_hot_df4[m_adult_5.50.5.n3.vec,]
tda.m_adult_5.50.5.n4.vec<-adult.one_hot_df4[m_adult_5.50.5.n4.vec,]
tda.m_adult_5.50.5.n5.vec<-adult.one_hot_df4[m_adult_5.50.5.n5.vec,]

##*** Adult Mapper 5 intervals, 40% overlap, 5 bins

m_adult_5.40.5 <- mapper1D(
     distance_matrix = dist(adult.one_hot_df),
     filter_values = c(ts_pca_b$PC1),
     num_intervals = 5,
     percent_overlap = 40,
     num_bins_when_clustering = 5)


g_adult_5.40.5 <- graph.adjacency(m_adult_5.40.5$adjacency, mode="undirected")
plot(g_adult_5.40.5, layout = layout.auto(g_adult_5.40.5))

head(str(m_adult_5.40.5$level_of_vertex))
##  int [1:5] 1 2 3 4 5
## NULL
head(str(m_adult_5.40.5$vertices_in_level))
## List of 5
##  $ : num 1
##  $ : num 2
##  $ : num 3
##  $ : num 4
##  $ : num 5
## NULL
head(str(m_adult_5.40.5$points_in_vertex))
## List of 5
##  $ : int [1:3373] 8 10 12 21 26 46 64 69 73 95 ...
##  $ : int [1:10276] 2 8 10 11 12 15 21 24 26 28 ...
##  $ : int [1:11563] 1 2 4 6 9 16 19 20 23 24 ...
##  $ : int [1:14818] 1 3 4 5 6 9 13 14 16 17 ...
##  $ : int [1:12081] 7 13 14 18 22 25 27 32 36 37 ...
## NULL
my_resolution = 100
my_palette = colorRampPalette(c('red','green','lightblue'))
my_max = max(m_adult_5.40.5$level_of_vertex, na.rm=TRUE)
my_vector = m_adult_5.40.5$level_of_vertex / my_max

my_colors = my_palette(my_resolution)[as.numeric(cut(
                       my_vector, breaks=my_resolution))]

g_adult_5.40.5 <- graph.adjacency(m_adult_5.40.5$adjacency, mode="undirected")
vertex_size <- unlist(lapply(m_adult_5.40.5$points_in_vertex,
                             function(x) length(x)))

plot(g_adult_5.50.5, layout = layout.auto(g_adult_5.40.5),
     vertex.size = 30*log(vertex_size)/
     max(log(vertex_size)),
     vertex.color = my_colors)

m_adult_5.40.5.n1<-m_adult_5.40.5$points_in_vertex[1]
    m_adult_5.40.5.n1.vec<-as.vector(unlist(m_adult_5.40.5.n1))
m_adult_5.40.5.n2<-m_adult_5.40.5$points_in_vertex[2]
    m_adult_5.40.5.n2.vec<-as.vector(unlist(m_adult_5.40.5.n2)) 
m_adult_5.40.5.n3<-m_adult_5.40.5$points_in_vertex[3]
    m_adult_5.40.5.n3.vec<-as.vector(unlist(m_adult_5.40.5.n3))
m_adult_5.40.5.n4<-m_adult_5.40.5$points_in_vertex[4]
    m_adult_5.40.5.n4.vec<-as.vector(unlist(m_adult_5.40.5.n4)) 
m_adult_5.40.5.n5<-m_adult_5.40.5$points_in_vertex[5]
    m_adult_5.40.5.n5.vec<-as.vector(unlist(m_adult_5.40.5.n5))

##map the ID’s of each Mapper vertex point to the actual Adult One Hot DF1 dataset
tda.m_adult_5.40.5.n1.vec<-adult.one_hot_df4[m_adult_5.40.5.n1.vec,]
tda.m_adult_5.40.5.n2.vec<-adult.one_hot_df4[m_adult_5.40.5.n2.vec,]
tda.m_adult_5.40.5.n3.vec<-adult.one_hot_df4[m_adult_5.40.5.n3.vec,]
tda.m_adult_5.40.5.n4.vec<-adult.one_hot_df4[m_adult_5.40.5.n4.vec,]
tda.m_adult_5.40.5.n5.vec<-adult.one_hot_df4[m_adult_5.40.5.n5.vec,]


##*** Adult Mapper KDE Filter 5 intervals, 60% overlap, 5 bins

m_kde_adult_5.60.5 <- mapper1D(
      distance_matrix = dist(adult.one_hot_df),
      filter_values = c(filter.kde),
      num_intervals = 5,
      percent_overlap = 60,
      num_bins_when_clustering = 5)


g_kde_adult_5.60.5 <- graph.adjacency(m_kde_adult_5.60.5$adjacency, mode="undirected")
plot(g_kde_adult_5.60.5, layout = layout.auto(g_kde_adult_5.60.5))

head(str(m_kde_adult_5.60.5$level_of_vertex))
##  int [1:5] 1 2 3 4 5
## NULL
head(str(m_kde_adult_5.60.5$vertices_in_level))
## List of 5
##  $ : num 1
##  $ : num 2
##  $ : num 3
##  $ : num 4
##  $ : num 5
## NULL
head(str(m_kde_adult_5.60.5$points_in_vertex))
## List of 5
##  $ : int [1:15260] 2 4 5 6 7 9 13 16 19 20 ...
##  $ : int [1:14482] 1 2 4 6 8 9 12 13 20 22 ...
##  $ : int [1:13266] 1 2 8 10 11 12 13 14 27 28 ...
##  $ : int [1:11795] 3 8 10 11 12 14 15 17 27 28 ...
##  $ : int [1:8940] 3 15 17 18 27 32 37 39 49 55 ...
## NULL
my_resolution = 100
my_palette = colorRampPalette(c('red','green','lightblue'))
my_max = max(m_kde_adult_5.60.5$level_of_vertex, na.rm=TRUE)
my_vector = m_kde_adult_5.60.5$level_of_vertex / my_max

my_colors = my_palette(my_resolution)[as.numeric(cut(
                       my_vector, breaks=my_resolution))]

g_kde_adult_5.50.5 <- graph.adjacency(m_kde_adult_5.60.5$adjacency, mode="undirected")
vertex_size <- unlist(lapply(m_kde_adult_5.60.5$points_in_vertex,
                             function(x) length(x)))

plot(g_kde_adult_5.60.5, layout = layout.auto(g_kde_adult_5.60.5),
     vertex.size = 30*log(vertex_size)/
     max(log(vertex_size)),
     vertex.color = my_colors)

##Extract the ID observations of each mapper output vertex
m_kde_adult_5.60.5.n1<-m_kde_adult_5.60.5$points_in_vertex[1]
    m_kde_adult_5.60.5.n1.vec<-as.vector(unlist(m_kde_adult_5.60.5.n1))
m_kde_adult_5.60.5.n2<-m_kde_adult_5.60.5$points_in_vertex[2]
    m_kde_adult_5.60.5.n2.vec<-as.vector(unlist(m_kde_adult_5.60.5.n2)) 
m_kde_adult_5.60.5.n3<-m_kde_adult_5.60.5$points_in_vertex[3]
    m_kde_adult_5.60.5.n3.vec<-as.vector(unlist(m_kde_adult_5.60.5.n3))
m_kde_adult_5.60.5.n4<-m_kde_adult_5.60.5$points_in_vertex[4]
    m_kde_adult_5.60.5.n4.vec<-as.vector(unlist(m_kde_adult_5.60.5.n4)) 
m_kde_adult_5.60.5.n5<-m_kde_adult_5.60.5 $points_in_vertex[5]
    m_kde_adult_5.60.5.n5.vec<-as.vector(unlist(m_kde_adult_5.60.5.n5))

##map the ID’s of each Mapper vertex point to the actual Adult One Hot DF1 dataset
tda.m_kde_adult_5.60.5.n1.vec<-adult.one_hot_df4[m_kde_adult_5.60.5.n1.vec,]
tda.m_kde_adult_5.60.5.n2.vec<-adult.one_hot_df4[m_kde_adult_5.60.5.n2.vec,]
tda.m_kde_adult_5.60.5.n3.vec<-adult.one_hot_df4[m_kde_adult_5.60.5.n3.vec,]
tda.m_kde_adult_5.60.5.n4.vec<-adult.one_hot_df4[m_kde_adult_5.60.5.n4.vec,]
tda.m_kde_adult_5.60.5.n5.vec<-adult.one_hot_df4[m_kde_adult_5.60.5.n5.vec,]



##*** Adult Mapper KDE Filter 5 intervals, 50% overlap, 5 bins

m_kde_adult_5.50.5 <- mapper1D(
     distance_matrix = dist(adult.one_hot_df),
     filter_values = c(filter.kde),
     num_intervals = 5,
     percent_overlap = 50,
     num_bins_when_clustering = 5)

g_kde_adult_5.50.5 <- graph.adjacency(m_kde_adult_5.50.5$adjacency, mode="undirected")
plot(g_kde_adult_5.50.5, layout = layout.auto(g_kde_adult_5.50.5))

head(str(m_kde_adult_5.50.5$level_of_vertex))
##  int [1:5] 1 2 3 4 5
## NULL
head(str(m_kde_adult_5.50.5$vertices_in_level))
## List of 5
##  $ : num 1
##  $ : num 2
##  $ : num 3
##  $ : num 4
##  $ : num 5
## NULL
head(str(m_kde_adult_5.50.5$points_in_vertex))
## List of 5
##  $ : int [1:13387] 2 4 5 6 7 9 16 19 20 21 ...
##  $ : int [1:12638] 1 2 6 8 9 13 20 24 25 26 ...
##  $ : int [1:11634] 1 8 10 11 12 13 14 27 28 30 ...
##  $ : int [1:10038] 3 10 11 12 14 15 27 30 32 34 ...
##  $ : int [1:7540] 3 15 17 18 37 39 56 59 60 65 ...
## NULL
my_resolution = 100
my_palette = colorRampPalette(c('red','green','lightblue'))
my_max = max(m_kde_adult_5.50.5$level_of_vertex, na.rm=TRUE)
my_vector = m_kde_adult_5.50.5$level_of_vertex / my_max

my_colors = my_palette(my_resolution)[as.numeric(cut(
                       my_vector, breaks=my_resolution))]

g_kde_adult_5.50.5 <- graph.adjacency(m_kde_adult_5.50.5$adjacency, mode="undirected")
vertex_size <- unlist(lapply(m_kde_adult_5.50.5$points_in_vertex,
                             function(x) length(x)))

plot(g_kde_adult_5.50.5, layout = layout.auto(g_kde_adult_5.50.5),
     vertex.size = 30*log(vertex_size)/
     max(log(vertex_size)),
     vertex.color = my_colors)

##Extract the ID observations of each mapper output vertex
m_kde_adult_5.50.5.n1<-m_kde_adult_5.50.5$points_in_vertex[1]
    m_kde_adult_5.50.5.n1.vec<-as.vector(unlist(m_kde_adult_5.50.5.n1))
m_kde_adult_5.50.5.n2<-m_kde_adult_5.50.5$points_in_vertex[2]
    m_kde_adult_5.50.5.n2.vec<-as.vector(unlist(m_kde_adult_5.50.5.n2)) 
m_kde_adult_5.50.5.n3<-m_kde_adult_5.50.5$points_in_vertex[3]
    m_kde_adult_5.50.5.n3.vec<-as.vector(unlist(m_kde_adult_5.50.5.n3))
m_kde_adult_5.50.5.n4<-m_kde_adult_5.50.5$points_in_vertex[4]
    m_kde_adult_5.50.5.n4.vec<-as.vector(unlist(m_kde_adult_5.50.5.n4)) 
m_kde_adult_5.50.5.n5<-m_kde_adult_5.50.5 $points_in_vertex[5]
    m_kde_adult_5.50.5.n5.vec<-as.vector(unlist(m_kde_adult_5.50.5.n5))

##map the ID’s of each Mapper vertex point to the actual Adult One Hot DF1 dataset
tda.m_kde_adult_5.50.5.n1.vec<-adult.one_hot_df4[m_kde_adult_5.50.5.n1.vec,]
tda.m_kde_adult_5.50.5.n2.vec<-adult.one_hot_df4[m_kde_adult_5.50.5.n2.vec,]
tda.m_kde_adult_5.50.5.n3.vec<-adult.one_hot_df4[m_kde_adult_5.50.5.n3.vec,]
tda.m_kde_adult_5.50.5.n4.vec<-adult.one_hot_df4[m_kde_adult_5.50.5.n4.vec,]
tda.m_kde_adult_5.50.5.n5.vec<-adult.one_hot_df4[m_kde_adult_5.50.5.n5.vec,]




##*** Adult Mapper KDE 5 intervals, 40% overlap, 5 bins

m_kde_adult_5.40.5 <- mapper1D(
     distance_matrix = dist(adult.one_hot_df),
     filter_values = c(filter.kde),
     num_intervals = 5,
     percent_overlap = 40,
     num_bins_when_clustering = 5)


g_kde_adult_5.40.5 <- graph.adjacency(m_kde_adult_5.40.5$adjacency, mode="undirected")
plot(g_kde_adult_5.40.5, layout = layout.auto(g_kde_adult_5.40.5))

head(str(m_kde_adult_5.40.5$level_of_vertex))
##  int [1:5] 1 2 3 4 5
## NULL
head(str(m_kde_adult_5.40.5$vertices_in_level))
## List of 5
##  $ : num 1
##  $ : num 2
##  $ : num 3
##  $ : num 4
##  $ : num 5
## NULL
head(str(m_kde_adult_5.40.5$points_in_vertex))
## List of 5
##  $ : int [1:11838] 4 5 6 7 9 16 19 20 21 22 ...
##  $ : int [1:11203] 1 2 6 9 13 20 24 25 26 29 ...
##  $ : int [1:10351] 1 8 10 11 12 14 27 28 30 31 ...
##  $ : int [1:8741] 3 10 11 12 14 15 27 30 32 34 ...
##  $ : int [1:6628] 3 15 17 18 37 39 59 60 65 66 ...
## NULL
my_resolution = 100
my_palette = colorRampPalette(c('red','green','lightblue'))
my_max = max(m_kde_adult_5.40.5$level_of_vertex, na.rm=TRUE)
my_vector = m_kde_adult_5.40.5$level_of_vertex / my_max

my_colors = my_palette(my_resolution)[as.numeric(cut(
                       my_vector, breaks=my_resolution))]

g_kde_adult_5.40.5 <- graph.adjacency(m_kde_adult_5.40.5$adjacency, mode="undirected")
vertex_size <- unlist(lapply(m_kde_adult_5.40.5$points_in_vertex,
                             function(x) length(x)))

plot(g_kde_adult_5.40.5, layout = layout.auto(g_kde_adult_5.40.5),
     vertex.size = 30*log(vertex_size)/
     max(log(vertex_size)),
     vertex.color = my_colors)

##Extract the ID observations of each mapper output vertex
m_kde_adult_5.40.5.n1<-m_kde_adult_5.40.5$points_in_vertex[1]
    m_kde_adult_5.40.5.n1.vec<-as.vector(unlist(m_kde_adult_5.40.5.n1))
m_kde_adult_5.40.5.n2<-m_kde_adult_5.40.5$points_in_vertex[2]
    m_kde_adult_5.40.5.n2.vec<-as.vector(unlist(m_kde_adult_5.40.5.n2)) 
m_kde_adult_5.40.5.n3<-m_kde_adult_5.40.5$points_in_vertex[3]
    m_kde_adult_5.40.5.n3.vec<-as.vector(unlist(m_kde_adult_5.40.5.n3))
m_kde_adult_5.40.5.n4<-m_kde_adult_5.40.5$points_in_vertex[4]
    m_kde_adult_5.40.5.n4.vec<-as.vector(unlist(m_kde_adult_5.40.5.n4)) 
m_kde_adult_5.40.5.n5<-m_kde_adult_5.40.5 $points_in_vertex[5]
    m_kde_adult_5.40.5.n5.vec<-as.vector(unlist(m_kde_adult_5.40.5.n5))

##map the ID’s of each Mapper vertex point to the actual Adult One Hot DF4 dataset
tda.m_kde_adult_5.40.5.n1.vec<-adult.one_hot_df4[m_kde_adult_5.40.5.n1.vec,]
tda.m_kde_adult_5.40.5.n2.vec<-adult.one_hot_df4[m_kde_adult_5.40.5.n2.vec,]
tda.m_kde_adult_5.40.5.n3.vec<-adult.one_hot_df4[m_kde_adult_5.40.5.n3.vec,]
tda.m_kde_adult_5.40.5.n4.vec<-adult.one_hot_df4[m_kde_adult_5.40.5.n4.vec,]
tda.m_kde_adult_5.40.5.n5.vec<-adult.one_hot_df4[m_kde_adult_5.40.5.n5.vec,]
library(caret)

#Prepare Adult dataset for Mapper 1D algorithm
adult_df1<-adult[,15]
adult.one_hot_df1<-cbind(adult.one_hot_df,adult_df1)
adult.one_hot_df2<-adult.one_hot_df1[,c(1,11,28,64,65,66,109)]
adult.one_hot_df3<-adult.one_hot_df1[,c(1,11,28,62,63,64,65,66)]
adult.one_hot_df4<-adult.one_hot_df1[,-c(109,110)]

trainIndex <- createDataPartition(adult.one_hot_df4$adult_df1, p = .7, 
                                  list = FALSE, 
                                  times = 1)

head(trainIndex)
##      Resample1
## [1,]         1
## [2,]         2
## [3,]         3
## [4,]         4
## [5,]         5
## [6,]         8
adult.one_hot_df4Train <- adult.one_hot_df4[ trainIndex,]
adult.one_hot_df4Test  <- adult.one_hot_df4[-trainIndex,]
#Train Control: k-Fold Cross-validation basis for all models 
fitControl <- trainControl(## 10-fold CV
                           method = "cv",
                           number = 3)
#Non-TDA-Assited
rfGrid<-expand.grid(mtry = (1:20)*50)
#Random Forest 
adultRfFit <- train(as.factor(adult_df1) ~ ., data = adult.one_hot_df4Train, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'),                    
                 metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
adultRfFit
## Random Forest 
## 
## 22793 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 15196, 15195, 15195 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     50  0.8580265  0.5919831
##    100  0.8547799  0.5844959
##    150  0.8547799  0.5843957
##    200  0.8544289  0.5830623
##    250  0.8542096  0.5826708
##    300  0.8543851  0.5829630
##    350  0.8546922  0.5837050
##    400  0.8547361  0.5846706
##    450  0.8546044  0.5832432
##    500  0.8545605  0.5837993
##    550  0.8546482  0.5841542
##    600  0.8544289  0.5831200
##    650  0.8546483  0.5842098
##    700  0.8547799  0.5837997
##    750  0.8539902  0.5819200
##    800  0.8539463  0.5818111
##    850  0.8540780  0.5818868
##    900  0.8555696  0.5869136
##    950  0.8551309  0.5858845
##   1000  0.8545167  0.5837978
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
adultRfFit$resample
##    Accuracy     Kappa Resample
## 1 0.8567856 0.5897204    Fold1
## 2 0.8585154 0.5920166    Fold3
## 3 0.8587786 0.5942124    Fold2
ad_rf_fit_re<-adultRfFit$resample[1]


summary(adultRfFit)
##                 Length Class      Mode     
## call                5  -none-     call     
## type                1  -none-     character
## predicted       22793  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           45586  matrix     numeric  
## oob.times       22793  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               22793  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               1  -none-     list
vip(adultRfFit,25) + ggtitle("non-TDA-Assisted: RF")

# Predict outcome using model from training data based on testing data
predictions <- predict(adultRfFit, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
rf_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
rf_cf
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6866   831
##      >50K     550  1521
##                                           
##                Accuracy : 0.8586          
##                  95% CI : (0.8516, 0.8655)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5969          
##                                           
##  Mcnemar's Test P-Value : 4.898e-14       
##                                           
##             Sensitivity : 0.9258          
##             Specificity : 0.6467          
##          Pos Pred Value : 0.8920          
##          Neg Pred Value : 0.7344          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7029          
##    Detection Prevalence : 0.7880          
##       Balanced Accuracy : 0.7863          
##                                           
##        'Positive' Class :  <=50K          
## 
rf_cf$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.586200e-01   5.968664e-01   8.515525e-01   8.654729e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  3.576232e-131   4.897695e-14
rf_cf_ov_acc<-rf_cf$overall[1]
rf_cf$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9258360            0.6466837            0.8920359 
##       Neg Pred Value            Precision               Recall 
##            0.7344278            0.8920359            0.9258360 
##                   F1           Prevalence       Detection Rate 
##            0.9086217            0.7592138            0.7029075 
## Detection Prevalence    Balanced Accuracy 
##            0.7879812            0.7862599
rf_cf_pre_rec_f1<-rf_cf$byClass[5:7]

##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node1

Adult_TDA_PC_5.50.5_n1_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n1.vec, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'), 
                 metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Dominican.Republic, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands,
## V14.Honduras, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
Adult_TDA_PC_5.50.5_n1_RfFit0
## Random Forest 
## 
## 4917 samples
##  108 predictor
##    2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 3278, 3278, 3278 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa     
##     50  0.9713240  0.04590433
##    100  0.9707138  0.07816387
##    150  0.9703071  0.06638083
##    200  0.9713240  0.09238626
##    250  0.9713240  0.09059836
##    300  0.9709172  0.07874975
##    350  0.9711206  0.07980320
##    400  0.9709172  0.09037011
##    450  0.9713240  0.08041228
##    500  0.9709172  0.07901827
##    550  0.9705105  0.07737894
##    600  0.9709172  0.06845593
##    650  0.9703071  0.07662230
##    700  0.9711206  0.09135968
##    750  0.9711206  0.10188522
##    800  0.9711206  0.07980320
##    850  0.9711206  0.09095599
##    900  0.9711206  0.09089212
##    950  0.9709172  0.07921732
##   1000  0.9703071  0.07636039
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_PC_5.50.5_n1_RfFit0$resample
##    Accuracy        Kappa Resample
## 1 0.9707138  0.068463305    Fold1
## 2 0.9713240  0.071589553    Fold3
## 3 0.9719341 -0.002339865    Fold2
ad_tda_pc_5.50.5_n1_rf_fit0_re<-Adult_TDA_PC_5.50.5_n1_RfFit0$resample[1]


summary(Adult_TDA_PC_5.50.5_n1_RfFit0)
##                 Length Class      Mode     
## call               5   -none-     call     
## type               1   -none-     character
## predicted       4917   factor     numeric  
## err.rate        1500   -none-     numeric  
## confusion          6   -none-     numeric  
## votes           9834   matrix     numeric  
## oob.times       4917   -none-     numeric  
## classes            2   -none-     character
## importance       108   -none-     numeric  
## importanceSD       0   -none-     NULL     
## localImportance    0   -none-     NULL     
## proximity          0   -none-     NULL     
## ntree              1   -none-     numeric  
## mtry               1   -none-     numeric  
## forest            14   -none-     list     
## y               4917   factor     numeric  
## test               0   -none-     NULL     
## inbag              0   -none-     NULL     
## xNames           108   -none-     character
## problemType        1   -none-     character
## tuneValue          1   data.frame list     
## obsLevels          2   -none-     character
## param              1   -none-     list
vip(Adult_TDA_PC_5.50.5_n1_RfFit0,25) + ggtitle("Adult_TDA_PCA_5.50.5_n1_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_PC_5.50.5_n1_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n1_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n1_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n1_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K     43     1
##      >50K    7373  2351
##                                           
##                Accuracy : 0.2451          
##                  95% CI : (0.2366, 0.2537)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.0026          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.005798        
##             Specificity : 0.999575        
##          Pos Pred Value : 0.977273        
##          Neg Pred Value : 0.241773        
##              Prevalence : 0.759214        
##          Detection Rate : 0.004402        
##    Detection Prevalence : 0.004505        
##       Balanced Accuracy : 0.502687        
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n1_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K     43     1
##      >50K    7373  2351
##                                           
##                Accuracy : 0.2451          
##                  95% CI : (0.2366, 0.2537)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.0026          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.005798        
##             Specificity : 0.999575        
##          Pos Pred Value : 0.977273        
##          Neg Pred Value : 0.241773        
##              Prevalence : 0.759214        
##          Detection Rate : 0.004402        
##    Detection Prevalence : 0.004505        
##       Balanced Accuracy : 0.502687        
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n1_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##    0.245085995    0.002595523    0.236581718    0.253742513    0.759213759 
## AccuracyPValue  McnemarPValue 
##    1.000000000    0.000000000
ad_tda_pc_5.50.5_n1_rf_cf0_ov_acc<-ad_tda_pc_5.50.5_n1_rf_cf0$overall[1]
ad_tda_pc_5.50.5_n1_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##          0.005798274          0.999574830          0.977272727 
##       Neg Pred Value            Precision               Recall 
##          0.241772933          0.977272727          0.005798274 
##                   F1           Prevalence       Detection Rate 
##          0.011528150          0.759213759          0.004402129 
## Detection Prevalence    Balanced Accuracy 
##          0.004504505          0.502686552
ad_tda_pc_5.50.5_n1_rf_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n1_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_rf_n1_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.50.5_n1_rf_fit0_re)
diff_tda_pca_5.50.5_rf_n1_3_fold
##     Accuracy
## 1 -0.1139283
## 2 -0.1128086
## 3 -0.1131555
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_rf.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n1_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_rf.n1_3_fold_odds.left<-bst_tda_pca_5.50.5_rf.n1_3_fold$probLeft/bst_tda_pca_5.50.5_rf.n1_3_fold$probRight
bst_tda_pca_5.50.5_rf.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_rf.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n1_3_fold
## $winLeft
## [1] 0.9909333
## 
## $winRope
## [1] 0.009066667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.50.5_rf.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n1_3_fold
## $left
## [1] 0.9999932
## 
## $rope
## [1] 2.039677e-06
## 
## $right
## [1] 4.802523e-06
# Rope Plot
plot(rope(as.matrix(diff_tda_pca_5.50.5_rf_n1_3_fold),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf_n1_3_fold))
#bf_tda_pca_5.50.5_rf.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_rf_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_rf_n1_3_fold)
## t = -342.36, df = 2, p-value = 8.532e-06
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1147213 -0.1118736
## sample estimates:
##  mean of x 
## -0.1132974
### Test set diff
diff_tda_pca_5.50.5_rf.n1_test<-(rf_cf_ov_acc-ad_tda_pc_5.50.5_n1_rf_cf0_ov_acc)
diff_tda_pca_5.50.5_rf.n1_test
## Accuracy 
## 0.613534
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_rf.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf.n1_test),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n1_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_rf.n1_test_odds.left<-bst_tda_pca_5.50.5_rf.n1_test$probLeft/bst_tda_pca_5.50.5_rf.n1_test$probRight
bst_tda_pca_5.50.5_rf.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_rf.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf.n1_test),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n1_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1559667
## 
## $winRight
## [1] 0.8440333
# Bayesian Correlated Test

bct_tda_pca_5.50.5_rf.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_rf.n1_test)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf.n1_test)) #bf_tda_pca_5.50.5_rf.n1_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_rf.n1_test))

##Node2

Adult_TDA_PC_5.50.5_n2_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n2.vec, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'), 
                 metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
Adult_TDA_PC_5.50.5_n2_RfFit0
## Random Forest 
## 
## 12206 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8136, 8138, 8138 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     50  0.7307869  0.4603031
##    100  0.7261173  0.4503848
##    150  0.7233314  0.4450216
##    200  0.7255440  0.4493088
##    250  0.7248885  0.4478596
##    300  0.7252975  0.4487869
##    350  0.7252981  0.4488116
##    400  0.7252979  0.4488017
##    450  0.7241507  0.4463752
##    500  0.7246428  0.4471666
##    550  0.7251339  0.4483310
##    600  0.7257078  0.4496379
##    650  0.7243152  0.4468307
##    700  0.7253800  0.4490015
##    750  0.7236593  0.4456333
##    800  0.7248064  0.4478642
##    850  0.7263631  0.4508071
##    900  0.7259537  0.4500862
##    950  0.7262809  0.4507923
##   1000  0.7234957  0.4450489
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_PC_5.50.5_n2_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.7385749 0.4752125    Fold1
## 2 0.7278761 0.4534681    Fold3
## 3 0.7259095 0.4522287    Fold2
ad_tda_pc_5.50.5_n2_rf_fit0_re<-Adult_TDA_PC_5.50.5_n2_RfFit0$resample[1]


summary(Adult_TDA_PC_5.50.5_n2_RfFit0)
##                 Length Class      Mode     
## call                5  -none-     call     
## type                1  -none-     character
## predicted       12206  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           24412  matrix     numeric  
## oob.times       12206  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               12206  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               1  -none-     list
vip(Adult_TDA_PC_5.50.5_n2_RfFit0,25) + ggtitle("Adult_TDA_PCA_5.50.5_n2_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_PC_5.50.5_n2_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n2_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n2_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n2_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   1936     8
##      >50K    5480  2344
##                                           
##                Accuracy : 0.4382          
##                  95% CI : (0.4283, 0.4481)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.1436          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.2611          
##             Specificity : 0.9966          
##          Pos Pred Value : 0.9959          
##          Neg Pred Value : 0.2996          
##              Prevalence : 0.7592          
##          Detection Rate : 0.1982          
##    Detection Prevalence : 0.1990          
##       Balanced Accuracy : 0.6288          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n2_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   1936     8
##      >50K    5480  2344
##                                           
##                Accuracy : 0.4382          
##                  95% CI : (0.4283, 0.4481)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.1436          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.2611          
##             Specificity : 0.9966          
##          Pos Pred Value : 0.9959          
##          Neg Pred Value : 0.2996          
##              Prevalence : 0.7592          
##          Detection Rate : 0.1982          
##    Detection Prevalence : 0.1990          
##       Balanced Accuracy : 0.6288          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n2_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.4381654      0.1435942      0.4282945      0.4480732      0.7592138 
## AccuracyPValue  McnemarPValue 
##      1.0000000      0.0000000
ad_tda_pc_5.50.5_n2_rf_cf0_ov_acc<-ad_tda_pc_5.50.5_n2_rf_cf0$overall[1]
ad_tda_pc_5.50.5_n2_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.2610572            0.9965986            0.9958848 
##       Neg Pred Value            Precision               Recall 
##            0.2995910            0.9958848            0.2610572 
##                   F1           Prevalence       Detection Rate 
##            0.4136752            0.7592138            0.1981982 
## Detection Prevalence    Balanced Accuracy 
##            0.1990172            0.6288279
ad_tda_pc_5.50.5_n2_rf_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n2_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_rf_n2_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.50.5_n2_rf_fit0_re)
diff_tda_pca_5.50.5_rf_n2_3_fold
##    Accuracy
## 1 0.1182106
## 2 0.1306393
## 3 0.1328691
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_rf.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_rf.n2_3_fold_odds.left<-bst_tda_pca_5.50.5_rf.n2_3_fold$probLeft/bst_tda_pca_5.50.5_rf.n2_3_fold$probRight
bst_tda_pca_5.50.5_rf.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_rf.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.008933333
## 
## $winRight
## [1] 0.9910667
# Bayesian Correlated Test

bct_tda_pca_5.50.5_rf.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n2_3_fold
## $left
## [1] 0.0007344378
## 
## $rope
## [1] 0.0002711296
## 
## $right
## [1] 0.9989944
# Rope Plot
plot(rope(as.matrix(diff_tda_pca_5.50.5_rf_n2_3_fold),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf_n2_3_fold))
#bf_tda_pca_5.50.5_rf.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_rf_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_rf_n2_3_fold)
## t = 27.902, df = 2, p-value = 0.001282
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.1076188 0.1468605
## sample estimates:
## mean of x 
## 0.1272397
### Test set diff
diff_tda_pca_5.50.5_rf.n2_test<-(rf_cf_ov_acc-ad_tda_pc_5.50.5_n2_rf_cf0_ov_acc)
diff_tda_pca_5.50.5_rf.n2_test
##  Accuracy 
## 0.4204545
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_rf.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf.n2_test),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_rf.n2_test_odds.left<-bst_tda_pca_5.50.5_rf.n2_test$probLeft/bst_tda_pca_5.50.5_rf.n2_test$probRight
bst_tda_pca_5.50.5_rf.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_rf.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf.n2_test),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1556333
## 
## $winRight
## [1] 0.8443667
# Bayesian Correlated Test

bct_tda_pca_5.50.5_rf.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(as.matrix(diff_tda_pca_5.50.5_rf.n2_test),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf.n2_test)) #bf_tda_pca_5.50.5_rf.n2_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_rf.n2_test))

##Node3

Adult_TDA_PC_5.50.5_n3_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n3.vec, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'), 
                 metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
Adult_TDA_PC_5.50.5_n3_RfFit0
## Random Forest 
## 
## 13240 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8827, 8827, 8826 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     50  0.8435796  0.5009515
##    100  0.8395766  0.4915433
##    150  0.8391235  0.4904715
##    200  0.8403321  0.4928573
##    250  0.8395013  0.4913444
##    300  0.8392746  0.4902961
##    350  0.8395012  0.4904205
##    400  0.8392745  0.4899087
##    450  0.8395012  0.4913642
##    500  0.8388215  0.4888537
##    550  0.8383684  0.4868825
##    600  0.8395766  0.4924767
##    650  0.8397279  0.4912020
##    700  0.8402565  0.4938746
##    750  0.8398033  0.4912664
##    800  0.8389725  0.4894692
##    850  0.8397278  0.4925311
##    900  0.8398789  0.4922070
##    950  0.8397277  0.4920199
##   1000  0.8393502  0.4911477
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_PC_5.50.5_n3_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8391117 0.4901986    Fold1
## 2 0.8495696 0.5149349    Fold3
## 3 0.8420576 0.4977208    Fold2
ad_tda_pc_5.50.5_n3_rf_fit0_re<-Adult_TDA_PC_5.50.5_n3_RfFit0$resample[1]


summary(Adult_TDA_PC_5.50.5_n3_RfFit0)
##                 Length Class      Mode     
## call                5  -none-     call     
## type                1  -none-     character
## predicted       13240  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           26480  matrix     numeric  
## oob.times       13240  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               13240  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               1  -none-     list
vip(Adult_TDA_PC_5.50.5_n3_RfFit0,25) + ggtitle("Adult_TDA_PCA_5.50.5_n3_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_PC_5.50.5_n3_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n3_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n3_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n3_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   4827  1137
##      >50K    2589  1215
##                                           
##                Accuracy : 0.6186          
##                  95% CI : (0.6088, 0.6282)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.1383          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.6509          
##             Specificity : 0.5166          
##          Pos Pred Value : 0.8094          
##          Neg Pred Value : 0.3194          
##              Prevalence : 0.7592          
##          Detection Rate : 0.4942          
##    Detection Prevalence : 0.6106          
##       Balanced Accuracy : 0.5837          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n3_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   4827  1137
##      >50K    2589  1215
##                                           
##                Accuracy : 0.6186          
##                  95% CI : (0.6088, 0.6282)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.1383          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.6509          
##             Specificity : 0.5166          
##          Pos Pred Value : 0.8094          
##          Neg Pred Value : 0.3194          
##              Prevalence : 0.7592          
##          Detection Rate : 0.4942          
##    Detection Prevalence : 0.6106          
##       Balanced Accuracy : 0.5837          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n3_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   6.185504e-01   1.383175e-01   6.088322e-01   6.281978e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.000000e+00  6.677915e-125
ad_tda_pc_5.50.5_n3_rf_cf0_ov_acc<-ad_tda_pc_5.50.5_n3_rf_cf0$overall[1]
ad_tda_pc_5.50.5_n3_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.6508900            0.5165816            0.8093561 
##       Neg Pred Value            Precision               Recall 
##            0.3194006            0.8093561            0.6508900 
##                   F1           Prevalence       Detection Rate 
##            0.7215247            0.7592138            0.4941646 
## Detection Prevalence    Balanced Accuracy 
##            0.6105651            0.5837358
ad_tda_pc_5.50.5_n3_rf_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n3_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_rf_n3_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.50.5_n3_rf_fit0_re)
diff_tda_pca_5.50.5_rf_n3_3_fold
##      Accuracy
## 1 0.017673858
## 2 0.008945847
## 3 0.016721069
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_rf.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n3_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_rf.n3_3_fold_odds.left<-bst_tda_pca_5.50.5_rf.n3_3_fold$probLeft/bst_tda_pca_5.50.5_rf.n3_3_fold$probRight
bst_tda_pca_5.50.5_rf.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_rf.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n3_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.3009667
## 
## $winRight
## [1] 0.6990333
# Bayesian Correlated Test

bct_tda_pca_5.50.5_rf.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n2_3_fold
## $left
## [1] 0.0007344378
## 
## $rope
## [1] 0.0002711296
## 
## $right
## [1] 0.9989944
# Rope Plot
plot(rope(as.matrix(diff_tda_pca_5.50.5_rf_n3_3_fold),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf_n3_3_fold))
#bf_tda_pca_5.50.5_rf.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_rf_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_rf_n3_3_fold)
## t = 5.2263, df = 2, p-value = 0.03472
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.002553289 0.026340560
## sample estimates:
##  mean of x 
## 0.01444692
### Test set diff
diff_tda_pca_5.50.5_rf.n3_test<-(rf_cf_ov_acc-ad_tda_pc_5.50.5_n3_rf_cf0_ov_acc)
diff_tda_pca_5.50.5_rf.n3_test
##  Accuracy 
## 0.2400696
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_rf.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf.n3_test),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n3_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_rf.n3_test_odds.left<-bst_tda_pca_5.50.5_rf.n3_test$probLeft/bst_tda_pca_5.50.5_rf.n3_test$probRight
bst_tda_pca_5.50.5_rf.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_rf.n3_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf.n3_test),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n3_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1568667
## 
## $winRight
## [1] 0.8431333
# Bayesian Correlated Test

bct_tda_pca_5.50.5_rf.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_rf.n3_test))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf.n3_test)) #bf_tda_pca_5.50.5_rf.n3_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_rf.n2_test)

##Node4

Adult_TDA_PC_5.50.5_n4_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n4.vec, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'), 
                 metric='Accuracy')
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
Adult_TDA_PC_5.50.5_n4_RfFit0
## Random Forest 
## 
## 16700 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 11134, 11133, 11133 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     50  0.9517965  0.3550857
##    100  0.9519760  0.3657386
##    150  0.9515569  0.3625767
##    200  0.9514971  0.3585936
##    250  0.9517366  0.3591983
##    300  0.9514371  0.3635162
##    350  0.9517365  0.3617620
##    400  0.9516168  0.3629410
##    450  0.9516767  0.3660076
##    500  0.9515569  0.3616388
##    550  0.9513773  0.3599246
##    600  0.9514372  0.3593316
##    650  0.9514371  0.3602456
##    700  0.9517965  0.3630400
##    750  0.9518563  0.3640973
##    800  0.9512575  0.3598879
##    850  0.9519162  0.3643928
##    900  0.9513773  0.3579457
##    950  0.9513773  0.3598592
##   1000  0.9514970  0.3638851
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 100.
Adult_TDA_PC_5.50.5_n4_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.9518505 0.3617923    Fold1
## 2 0.9538351 0.4020191    Fold3
## 3 0.9502425 0.3334044    Fold2
ad_tda_pc_5.50.5_n4_rf_fit0_re<-Adult_TDA_PC_5.50.5_n4_RfFit0$resample[1]


summary(Adult_TDA_PC_5.50.5_n4_RfFit0)
##                 Length Class      Mode     
## call                5  -none-     call     
## type                1  -none-     character
## predicted       16700  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           33400  matrix     numeric  
## oob.times       16700  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               16700  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               1  -none-     list
vip(Adult_TDA_PC_5.50.5_n4_RfFit0,25) + ggtitle("Adult_TDA_PCA_5.50.5_n4_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_PC_5.50.5_n4_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n4_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n4_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n4_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7414  1657
##      >50K       2   695
##                                           
##                Accuracy : 0.8302          
##                  95% CI : (0.8226, 0.8376)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.3886          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9997          
##             Specificity : 0.2955          
##          Pos Pred Value : 0.8173          
##          Neg Pred Value : 0.9971          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7590          
##    Detection Prevalence : 0.9286          
##       Balanced Accuracy : 0.6476          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n4_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7414  1657
##      >50K       2   695
##                                           
##                Accuracy : 0.8302          
##                  95% CI : (0.8226, 0.8376)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.3886          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9997          
##             Specificity : 0.2955          
##          Pos Pred Value : 0.8173          
##          Neg Pred Value : 0.9971          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7590          
##    Detection Prevalence : 0.9286          
##       Balanced Accuracy : 0.6476          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n4_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.301597e-01   3.885773e-01   8.225640e-01   8.375580e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   2.290367e-65   0.000000e+00
ad_tda_pc_5.50.5_n4_rf_cf0_ov_acc<-ad_tda_pc_5.50.5_n4_rf_cf0$overall[1]
ad_tda_pc_5.50.5_n4_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9997303            0.2954932            0.8173300 
##       Neg Pred Value            Precision               Recall 
##            0.9971306            0.8173300            0.9997303 
##                   F1           Prevalence       Detection Rate 
##            0.8993753            0.7592138            0.7590090 
## Detection Prevalence    Balanced Accuracy 
##            0.9286446            0.6476118
ad_tda_pc_5.50.5_n4_rf_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n4_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_rf_n4_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.50.5_n4_rf_fit0_re)
diff_tda_pca_5.50.5_rf_n4_3_fold
##      Accuracy
## 1 -0.09506495
## 2 -0.09531970
## 3 -0.09146387
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_rf.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n4_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_rf.n4_3_fold_odds.left<-bst_tda_pca_5.50.5_rf.n4_3_fold$probLeft/bst_tda_pca_5.50.5_rf.n4_3_fold$probRight
bst_tda_pca_5.50.5_rf.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_rf.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n4_3_fold
## $winLeft
## [1] 0.9912333
## 
## $winRope
## [1] 0.008766667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.50.5_rf.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n4_3_fold
## $left
## [1] 0.9998534
## 
## $rope
## [1] 5.095638e-05
## 
## $right
## [1] 9.560297e-05
# Rope Plot
plot(rope(as.matrix(diff_tda_pca_5.50.5_rf_n4_3_fold),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf_n4_3_fold))
#bf_tda_pca_5.50.5_rf.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_rf_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_rf_n4_3_fold)
## t = -75.462, df = 2, p-value = 0.0001756
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.09930627 -0.08859275
## sample estimates:
##   mean of x 
## -0.09394951
### Test set diff
diff_tda_pca_5.50.5_rf.n4_test<-(rf_cf_ov_acc-ad_tda_pc_5.50.5_n4_rf_cf0_ov_acc)
diff_tda_pca_5.50.5_rf.n4_test
##   Accuracy 
## 0.02846028
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_rf.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf.n4_test),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_rf.n4_test_odds.left<-bst_tda_pca_5.50.5_rf.n4_test$probLeft/bst_tda_pca_5.50.5_rf.n4_test$probRight
bst_tda_pca_5.50.5_rf.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_rf.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf.n4_test),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1595667
## 
## $winRight
## [1] 0.8404333
# Bayesian Correlated Test

bct_tda_pca_5.50.5_rf.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_rf.n4_test))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf.n4_test)) #bf_tda_pca_5.50.5_rf.n4_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_rf.n4_test))

##Node5

Adult_TDA_PC_5.50.5_n5_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n5.vec, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'), 
                 metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
Adult_TDA_PC_5.50.5_n5_RfFit0
## Random Forest 
## 
## 14404 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 9602, 9602, 9604 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     50  0.9981949  0.2132988
##    100  0.9980561  0.1972965
##    150  0.9980561  0.1972965
##    200  0.9979867  0.1908911
##    250  0.9979867  0.1908911
##    300  0.9979867  0.1908911
##    350  0.9980561  0.1972965
##    400  0.9979867  0.1908911
##    450  0.9979173  0.1439117
##    500  0.9980561  0.1972965
##    550  0.9981255  0.2375846
##    600  0.9981949  0.2449723
##    650  0.9980561  0.1972965
##    700  0.9980561  0.1972965
##    750  0.9979867  0.1908911
##    800  0.9979867  0.1908911
##    850  0.9980561  0.2311793
##    900  0.9979867  0.1908911
##    950  0.9981255  0.2375846
##   1000  0.9980561  0.1972965
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_PC_5.50.5_n5_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.9983340 0.3328702    Fold1
## 2 0.9981250 0.0000000    Fold3
## 3 0.9981258 0.3070263    Fold2
ad_tda_pc_5.50.5_n5_rf_fit0_re<-Adult_TDA_PC_5.50.5_n5_RfFit0$resample[1]


summary(Adult_TDA_PC_5.50.5_n5_RfFit0)
##                 Length Class      Mode     
## call                5  -none-     call     
## type                1  -none-     character
## predicted       14404  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           28808  matrix     numeric  
## oob.times       14404  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               14404  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               1  -none-     list
vip(Adult_TDA_PC_5.50.5_n5_RfFit0,25) + ggtitle("Adult_TDA_PCA_5.50.5_n5_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_PC_5.50.5_n5_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n5_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n5_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n5_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7413  2100
##      >50K       3   252
##                                           
##                Accuracy : 0.7847          
##                  95% CI : (0.7764, 0.7928)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1.314e-09       
##                                           
##                   Kappa : 0.1534          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9996          
##             Specificity : 0.1071          
##          Pos Pred Value : 0.7792          
##          Neg Pred Value : 0.9882          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7589          
##    Detection Prevalence : 0.9739          
##       Balanced Accuracy : 0.5534          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n5_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7413  2100
##      >50K       3   252
##                                           
##                Accuracy : 0.7847          
##                  95% CI : (0.7764, 0.7928)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1.314e-09       
##                                           
##                   Kappa : 0.1534          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9996          
##             Specificity : 0.1071          
##          Pos Pred Value : 0.7792          
##          Neg Pred Value : 0.9882          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7589          
##    Detection Prevalence : 0.9739          
##       Balanced Accuracy : 0.5534          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n5_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.847052e-01   1.534495e-01   7.764186e-01   7.928216e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.313670e-09   0.000000e+00
ad_tda_pc_5.50.5_n5_rf_cf0_ov_acc<-ad_tda_pc_5.50.5_n5_rf_cf0$overall[1]
ad_tda_pc_5.50.5_n5_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9995955            0.1071429            0.7792494 
##       Neg Pred Value            Precision               Recall 
##            0.9882353            0.7792494            0.9995955 
##                   F1           Prevalence       Detection Rate 
##            0.8757753            0.7592138            0.7589066 
## Detection Prevalence    Balanced Accuracy 
##            0.9738943            0.5533692
ad_tda_pc_5.50.5_n5_rf_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n5_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_rf_n5_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.50.5_n5_rf_fit0_re)
diff_tda_pca_5.50.5_rf_n5_3_fold
##     Accuracy
## 1 -0.1415485
## 2 -0.1396096
## 3 -0.1393472
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_rf.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n5_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_rf.n5_3_fold_odds.left<-bst_tda_pca_5.50.5_rf.n5_3_fold$probLeft/bst_tda_pca_5.50.5_rf.n5_3_fold$probRight
bst_tda_pca_5.50.5_rf.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_rf.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf_n5_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n5_3_fold
## $winLeft
## [1] 0.9914
## 
## $winRope
## [1] 0.0086
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.50.5_rf.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n5_3_fold
## $left
## [1] 0.999981
## 
## $rope
## [1] 4.713479e-06
## 
## $right
## [1] 1.424516e-05
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_rf_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf_n5_3_fold))
#bf_tda_pca_5.50.5_rf.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_rf_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_rf_n5_3_fold)
## t = -201.92, df = 2, p-value = 2.453e-05
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1431552 -0.1371816
## sample estimates:
##  mean of x 
## -0.1401684
### Test set diff
diff_tda_pca_5.50.5_rf.n5_test<-(rf_cf_ov_acc-ad_tda_pc_5.50.5_n5_rf_cf0_ov_acc)
diff_tda_pca_5.50.5_rf.n5_test
##   Accuracy 
## 0.07391482
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_rf.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf.n5_test),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_rf.n5_test_odds.left<-bst_tda_pca_5.50.5_rf.n5_test$probLeft/bst_tda_pca_5.50.5_rf.n5_test$probRight
bst_tda_pca_5.50.5_rf.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_rf.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf.n5_test),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.157
## 
## $winRight
## [1] 0.843
# Bayesian Correlated Test

bct_tda_pca_5.50.5_rf.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_rf.n5_test))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf.n5_test)) #bf_tda_pca_5.50.5_rf.n5_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_rf.n5_test))

##With TDA KDE filter 5 intervals, 50% overlap, 5 bins 
##Node1

Adult_TDA_KDE_5.50.5_n1_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n1.vec, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'), 
                 metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
Adult_TDA_KDE_5.50.5_n1_RfFit0
## Random Forest 
## 
## 13387 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8924, 8925, 8925 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     50  0.8613578  0.6262153
##    100  0.8607601  0.6272384
##    150  0.8587431  0.6220310
##    200  0.8588180  0.6215913
##    250  0.8603865  0.6252692
##    300  0.8599383  0.6250645
##    350  0.8591165  0.6222775
##    400  0.8598638  0.6249253
##    450  0.8598637  0.6247009
##    500  0.8594154  0.6235399
##    550  0.8605360  0.6259093
##    600  0.8605360  0.6259602
##    650  0.8589673  0.6226026
##    700  0.8583697  0.6209440
##    750  0.8603120  0.6255933
##    800  0.8581457  0.6193261
##    850  0.8591913  0.6226179
##    900  0.8583698  0.6200272
##    950  0.8593408  0.6233352
##   1000  0.8603865  0.6254018
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_KDE_5.50.5_n1_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8648891 0.6383603    Fold1
## 2 0.8623935 0.6307766    Fold3
## 3 0.8567907 0.6095090    Fold2
ad_tda_kde_5.50.5_n1_rf_fit0_re<-Adult_TDA_KDE_5.50.5_n1_RfFit0$resample[1]


summary(Adult_TDA_KDE_5.50.5_n1_RfFit0)
##                 Length Class      Mode     
## call                5  -none-     call     
## type                1  -none-     character
## predicted       13387  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           26774  matrix     numeric  
## oob.times       13387  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               13387  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               1  -none-     list
vip(Adult_TDA_KDE_5.50.5_n1_RfFit0,25) + ggtitle("Adult_TDA_KDE_5.50.5_n1_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_KDE_5.50.5_n1_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n1_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n1_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n1_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7126   534
##      >50K     290  1818
##                                         
##                Accuracy : 0.9156        
##                  95% CI : (0.91, 0.9211)
##     No Information Rate : 0.7592        
##     P-Value [Acc > NIR] : < 2.2e-16     
##                                         
##                   Kappa : 0.7608        
##                                         
##  Mcnemar's Test P-Value : < 2.2e-16     
##                                         
##             Sensitivity : 0.9609        
##             Specificity : 0.7730        
##          Pos Pred Value : 0.9303        
##          Neg Pred Value : 0.8624        
##              Prevalence : 0.7592        
##          Detection Rate : 0.7295        
##    Detection Prevalence : 0.7842        
##       Balanced Accuracy : 0.8669        
##                                         
##        'Positive' Class :  <=50K        
## 
ad_tda_kde_5.50.5_n1_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7126   534
##      >50K     290  1818
##                                         
##                Accuracy : 0.9156        
##                  95% CI : (0.91, 0.9211)
##     No Information Rate : 0.7592        
##     P-Value [Acc > NIR] : < 2.2e-16     
##                                         
##                   Kappa : 0.7608        
##                                         
##  Mcnemar's Test P-Value : < 2.2e-16     
##                                         
##             Sensitivity : 0.9609        
##             Specificity : 0.7730        
##          Pos Pred Value : 0.9303        
##          Neg Pred Value : 0.8624        
##              Prevalence : 0.7592        
##          Detection Rate : 0.7295        
##    Detection Prevalence : 0.7842        
##       Balanced Accuracy : 0.8669        
##                                         
##        'Positive' Class :  <=50K        
## 
ad_tda_kde_5.50.5_n1_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   9.156429e-01   7.608020e-01   9.099555e-01   9.210811e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   0.000000e+00   2.554808e-17
ad_tda_kde_5.50.5_n1_rf_cf0_ov_acc<-ad_tda_kde_5.50.5_n1_rf_cf0$overall[1]
ad_tda_kde_5.50.5_n1_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9608954            0.7729592            0.9302872 
##       Neg Pred Value            Precision               Recall 
##            0.8624288            0.9302872            0.9608954 
##                   F1           Prevalence       Detection Rate 
##            0.9453436            0.7592138            0.7295250 
## Detection Prevalence    Balanced Accuracy 
##            0.7841933            0.8669273
ad_tda_kde_5.50.5_n1_rf_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n1_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_rf_n1_3_fold<-(ad_rf_fit_re-ad_tda_kde_5.50.5_n1_rf_fit0_re)
diff_tda_kde_5.50.5_rf_n1_3_fold
##       Accuracy
## 1 -0.008103515
## 2 -0.003878147
## 3  0.001987949
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_rf.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n1_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_rf.n1_3_fold_odds.left<-bst_tda_kde_5.50.5_rf.n1_3_fold$probLeft/bst_tda_kde_5.50.5_rf.n1_3_fold$probRight
bst_tda_kde_5.50.5_rf.n1_3_fold_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_rf.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n1_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_rf.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n1_3_fold
## $left
## [1] 0.09355665
## 
## $rope
## [1] 0.8771248
## 
## $right
## [1] 0.02931854
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_rf_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_rf.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf_n1_3_fold))
#bf_tda_kde_5.50.5_rf.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_rf_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_rf_n1_3_fold)
## t = -1.1385, df = 2, p-value = 0.3729
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.015920632  0.009258157
## sample estimates:
##    mean of x 
## -0.003331237
### Test set diff
diff_tda_kde_5.50.5_rf.n1_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n1_rf_cf0_ov_acc)
diff_tda_kde_5.50.5_rf.n1_test
##    Accuracy 
## -0.05702293
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_rf.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf.n1_test),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n1_test
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_rf.n1_test_odds.left<-bst_tda_kde_5.50.5_rf.n1_test$probLeft/bst_tda_kde_5.50.5_rf.n1_test$probRight
bst_tda_kde_5.50.5_rf.n1_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_rf.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf.n1_test),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n1_test
## $winLeft
## [1] 0.8407667
## 
## $winRope
## [1] 0.1592333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_rf.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_rf.n1_test))

#BayesFactor
#bf_tda_kde_5.50.5_rf.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf.n1_test)) #bf_tda_kde_5.50.5_rf.n1_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_rf.n1_test))

##Node2

Adult_TDA_KDE_5.50.5_n2_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n2.vec, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'),                    
                 metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
Adult_TDA_KDE_5.50.5_n2_RfFit0
## Random Forest 
## 
## 12638 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8425, 8425, 8426 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     50  0.8440416  0.5990016
##    100  0.8415096  0.5932256
##    150  0.8411932  0.5934107
##    200  0.8433295  0.5982069
##    250  0.8432504  0.5982755
##    300  0.8423008  0.5957367
##    350  0.8417470  0.5947305
##    400  0.8430922  0.5978009
##    450  0.8426174  0.5965623
##    500  0.8409557  0.5928548
##    550  0.8413516  0.5928332
##    600  0.8434878  0.5977601
##    650  0.8423008  0.5954840
##    700  0.8418261  0.5946200
##    750  0.8413514  0.5935380
##    800  0.8414303  0.5934740
##    850  0.8432504  0.5981819
##    900  0.8416677  0.5943368
##    950  0.8441208  0.6002420
##   1000  0.8419052  0.5948442
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 950.
Adult_TDA_KDE_5.50.5_n2_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8469024 0.6054938    Fold1
## 2 0.8428300 0.5938551    Fold3
## 3 0.8426300 0.6013771    Fold2
ad_tda_KDE_5.50.5_n2_rf_fit0_re<-Adult_TDA_KDE_5.50.5_n2_RfFit0$resample[1]


summary(Adult_TDA_KDE_5.50.5_n2_RfFit0)
##                 Length Class      Mode     
## call                5  -none-     call     
## type                1  -none-     character
## predicted       12638  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           25276  matrix     numeric  
## oob.times       12638  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               12638  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               1  -none-     list
vip(Adult_TDA_KDE_5.50.5_n2_RfFit0,25) + ggtitle("Adult_TDA_KDE_5.50.5_n2_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_KDE_5.50.5_n2_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n2_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n2_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n2_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7116   554
##      >50K     300  1798
##                                           
##                Accuracy : 0.9126          
##                  95% CI : (0.9068, 0.9181)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.7517          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9595          
##             Specificity : 0.7645          
##          Pos Pred Value : 0.9278          
##          Neg Pred Value : 0.8570          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7285          
##    Detection Prevalence : 0.7852          
##       Balanced Accuracy : 0.8620          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n2_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7116   554
##      >50K     300  1798
##                                           
##                Accuracy : 0.9126          
##                  95% CI : (0.9068, 0.9181)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.7517          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9595          
##             Specificity : 0.7645          
##          Pos Pred Value : 0.9278          
##          Neg Pred Value : 0.8570          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7285          
##    Detection Prevalence : 0.7852          
##       Balanced Accuracy : 0.8620          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n2_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   9.125717e-01   7.517197e-01   9.067952e-01   9.181007e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   0.000000e+00   4.823006e-18
ad_tda_kde_5.50.5_n2_rf_cf0_ov_acc<-ad_tda_kde_5.50.5_n2_rf_cf0$overall[1]
ad_tda_kde_5.50.5_n2_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9595469            0.7644558            0.9277705 
##       Neg Pred Value            Precision               Recall 
##            0.8570067            0.9277705            0.9595469 
##                   F1           Prevalence       Detection Rate 
##            0.9433912            0.7592138            0.7285012 
## Detection Prevalence    Balanced Accuracy 
##            0.7852170            0.8620014
ad_tda_kde_5.50.5_n2_rf_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n2_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_rf_n2_3_fold<-(ad_rf_fit_re-ad_tda_KDE_5.50.5_n2_rf_fit0_re)
diff_tda_kde_5.50.5_rf_n2_3_fold
##      Accuracy
## 1 0.009883128
## 2 0.015685389
## 3 0.016148671
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_rf.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_rf.n2_3_fold_odds.left<-bst_tda_kde_5.50.5_rf.n2_3_fold$probLeft/bst_tda_kde_5.50.5_rf.n2_3_fold$probRight
bst_tda_kde_5.50.5_rf.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_rf.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.3033667
## 
## $winRight
## [1] 0.6966333
# Bayesian Correlated Test

bct_tda_kde_5.50.5_rf.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n2_3_fold
## $left
## [1] 0.004673605
## 
## $rope
## [1] 0.112999
## 
## $right
## [1] 0.8823274
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_rf_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_rf.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf_n2_3_fold))
#bf_tda_kde_5.50.5_rf.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_rf_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_rf_n2_3_fold)
## t = 6.8986, df = 2, p-value = 0.02037
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.005232692 0.022578767
## sample estimates:
##  mean of x 
## 0.01390573
### Test set diff
diff_tda_kde_5.50.5_rf.n2_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n2_rf_cf0_ov_acc)
diff_tda_kde_5.50.5_rf.n2_test
##    Accuracy 
## -0.05395168
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_rf.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf.n2_test),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n2_test
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_rf.n2_test_odds.left<-bst_tda_kde_5.50.5_rf.n2_test$probLeft/bst_tda_kde_5.50.5_rf.n2_test$probRight
bst_tda_kde_5.50.5_rf.n2_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_rf.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf.n2_test),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n2_test
## $winLeft
## [1] 0.841
## 
## $winRope
## [1] 0.159
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_rf.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_rf.n2_test))

#BayesFactor
#bf_tda_kde_5.50.5_rf.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf.n2_test)) #bf_tda_kde_5.50.5_rf.n2_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_rf.n2_test))

##Node3

Adult_TDA_KDE_5.50.5_n3_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n3.vec, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'),                    
                 metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
Adult_TDA_KDE_5.50.5_n3_RfFit0
## Random Forest 
## 
## 11634 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 7755, 7756, 7757 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     50  0.8341064  0.5700683
##    100  0.8305822  0.5610165
##    150  0.8307545  0.5623634
##    200  0.8306685  0.5623742
##    250  0.8320439  0.5661058
##    300  0.8308407  0.5626333
##    350  0.8304103  0.5614980
##    400  0.8299810  0.5610624
##    450  0.8295513  0.5595772
##    500  0.8307545  0.5619569
##    550  0.8321296  0.5653346
##    600  0.8311842  0.5632212
##    650  0.8296370  0.5592986
##    700  0.8323879  0.5669252
##    750  0.8312704  0.5637204
##    800  0.8312702  0.5629077
##    850  0.8313561  0.5640040
##    900  0.8317860  0.5661745
##    950  0.8322156  0.5662088
##   1000  0.8304966  0.5614328
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_KDE_5.50.5_n3_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8339778 0.5695067    Fold1
## 2 0.8277018 0.5506721    Fold3
## 3 0.8406395 0.5900261    Fold2
ad_tda_kde_5.50.5_n3_rf_fit0_re<-Adult_TDA_KDE_5.50.5_n3_RfFit0$resample[1]


summary(Adult_TDA_KDE_5.50.5_n3_RfFit0)
##                 Length Class      Mode     
## call                5  -none-     call     
## type                1  -none-     character
## predicted       11634  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           23268  matrix     numeric  
## oob.times       11634  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               11634  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               1  -none-     list
vip(Adult_TDA_KDE_5.50.5_n3_RfFit0,25) + ggtitle("Adult_TDA_KDE_5.50.5_n3_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_KDE_5.50.5_n3_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n3_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n3_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n3_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7115   558
##      >50K     301  1794
##                                           
##                Accuracy : 0.9121          
##                  95% CI : (0.9063, 0.9176)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.7502          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9594          
##             Specificity : 0.7628          
##          Pos Pred Value : 0.9273          
##          Neg Pred Value : 0.8563          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7284          
##    Detection Prevalence : 0.7855          
##       Balanced Accuracy : 0.8611          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n3_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7115   558
##      >50K     301  1794
##                                           
##                Accuracy : 0.9121          
##                  95% CI : (0.9063, 0.9176)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.7502          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9594          
##             Specificity : 0.7628          
##          Pos Pred Value : 0.9273          
##          Neg Pred Value : 0.8563          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7284          
##    Detection Prevalence : 0.7855          
##       Balanced Accuracy : 0.8611          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n3_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   9.120598e-01   7.501531e-01   9.062687e-01   9.176038e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   0.000000e+00   2.445041e-18
ad_tda_kde_5.50.5_n3_rf_cf0_ov_acc<-ad_tda_kde_5.50.5_n3_rf_cf0$overall[1]
ad_tda_kde_5.50.5_n3_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9594121            0.7627551            0.9272775 
##       Neg Pred Value            Precision               Recall 
##            0.8563246            0.9272775            0.9594121 
##                   F1           Prevalence       Detection Rate 
##            0.9430711            0.7592138            0.7283989 
## Detection Prevalence    Balanced Accuracy 
##            0.7855242            0.8610836
ad_tda_kde_5.50.5_n3_rf_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n3_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_rf_n3_3_fold<-(ad_rf_fit_re-ad_tda_kde_5.50.5_n3_rf_fit0_re)
diff_tda_kde_5.50.5_rf_n3_3_fold
##     Accuracy
## 1 0.02280774
## 2 0.03081357
## 3 0.01813912
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_rf.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n3_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_rf.n3_3_fold_odds.left<-bst_tda_kde_5.50.5_rf.n3_3_fold$probLeft/bst_tda_kde_5.50.5_rf.n3_3_fold$probRight
bst_tda_kde_5.50.5_rf.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_rf.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n3_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.0376
## 
## $winRight
## [1] 0.9624
# Bayesian Correlated Test

bct_tda_kde_5.50.5_rf.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n3_3_fold
## $left
## [1] 0.007751756
## 
## $rope
## [1] 0.03360722
## 
## $right
## [1] 0.958641
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_rf_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_rf.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf_n3_3_fold))
#bf_tda_kde_5.50.5_rf.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_rf_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_rf_n3_3_fold)
## t = 6.4635, df = 2, p-value = 0.02311
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.007996749 0.039843539
## sample estimates:
##  mean of x 
## 0.02392014
### Test set diff
diff_tda_kde_5.50.5_rf.n3_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n3_rf_cf0_ov_acc)
diff_tda_kde_5.50.5_rf.n3_test
##   Accuracy 
## -0.0534398
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_rf.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf.n3_test),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n3_test
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_rf.n3_test_odds.left<-bst_tda_kde_5.50.5_rf.n3_test$probLeft/bst_tda_kde_5.50.5_rf.n3_test$probRight
bst_tda_kde_5.50.5_rf.n3_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_rf.n3_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf.n3_test),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n3_test
## $winLeft
## [1] 0.8400333
## 
## $winRope
## [1] 0.1599667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_rf.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_rf.n3_test))

#BayesFactor
#bf_tda_kde_5.50.5_rf.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf.n3_test)) #bf_tda_kde_5.50.5_rf.n3_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_rf.n3_test))

##Node4

Adult_TDA_KDE_5.50.5_n4_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n4.vec, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'),                    
                 metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
Adult_TDA_KDE_5.50.5_n4_RfFit0
## Random Forest 
## 
## 10038 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 6692, 6692, 6692 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     50  0.8526599  0.5213723
##    100  0.8485754  0.5116948
##    150  0.8487747  0.5125076
##    200  0.8479777  0.5119375
##    250  0.8483762  0.5112452
##    300  0.8483762  0.5119849
##    350  0.8490735  0.5144138
##    400  0.8476788  0.5090453
##    450  0.8487747  0.5130535
##    500  0.8488743  0.5134342
##    550  0.8474796  0.5094925
##    600  0.8480773  0.5110583
##    650  0.8478781  0.5097130
##    700  0.8488743  0.5147859
##    750  0.8492728  0.5148479
##    800  0.8485754  0.5126250
##    850  0.8490735  0.5155813
##    900  0.8487747  0.5127515
##    950  0.8479777  0.5108229
##   1000  0.8469815  0.5086830
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_KDE_5.50.5_n4_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8631201 0.5534715    Fold1
## 2 0.8520622 0.5137710    Fold3
## 3 0.8427974 0.4968743    Fold2
ad_tda_kde_5.50.5_n4_rf_fit0_re<-Adult_TDA_KDE_5.50.5_n4_RfFit0$resample[1]


summary(Adult_TDA_KDE_5.50.5_n4_RfFit0)
##                 Length Class      Mode     
## call                5  -none-     call     
## type                1  -none-     character
## predicted       10038  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           20076  matrix     numeric  
## oob.times       10038  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               10038  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               1  -none-     list
vip(Adult_TDA_KDE_5.50.5_n4_RfFit0,25) + ggtitle("Adult_TDA_KDE_5.50.5_n4_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_KDE_5.50.5_n4_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n4_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n4_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n4_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6975   654
##      >50K     441  1698
##                                           
##                Accuracy : 0.8879          
##                  95% CI : (0.8815, 0.8941)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.6836          
##                                           
##  Mcnemar's Test P-Value : 1.488e-10       
##                                           
##             Sensitivity : 0.9405          
##             Specificity : 0.7219          
##          Pos Pred Value : 0.9143          
##          Neg Pred Value : 0.7938          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7141          
##    Detection Prevalence : 0.7810          
##       Balanced Accuracy : 0.8312          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n4_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6975   654
##      >50K     441  1698
##                                           
##                Accuracy : 0.8879          
##                  95% CI : (0.8815, 0.8941)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.6836          
##                                           
##  Mcnemar's Test P-Value : 1.488e-10       
##                                           
##             Sensitivity : 0.9405          
##             Specificity : 0.7219          
##          Pos Pred Value : 0.9143          
##          Neg Pred Value : 0.7938          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7141          
##    Detection Prevalence : 0.7810          
##       Balanced Accuracy : 0.8312          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n4_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.878993e-01   6.836098e-01   8.814756e-01   8.940906e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  1.354808e-228   1.487834e-10
ad_tda_kde_5.50.5_n4_rf_cf0_ov_acc<-ad_tda_kde_5.50.5_n4_rf_cf0$overall[1]
ad_tda_kde_5.50.5_n4_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9405340            0.7219388            0.9142745 
##       Neg Pred Value            Precision               Recall 
##            0.7938289            0.9142745            0.9405340 
##                   F1           Prevalence       Detection Rate 
##            0.9272183            0.7592138            0.7140663 
## Detection Prevalence    Balanced Accuracy 
##            0.7810197            0.8312364
ad_tda_kde_5.50.5_n4_rf_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n4_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_rf_n4_3_fold<-(ad_rf_fit_re-ad_tda_kde_5.50.5_n4_rf_fit0_re)
diff_tda_kde_5.50.5_rf_n4_3_fold
##       Accuracy
## 1 -0.006334570
## 2  0.006453235
## 3  0.015981256
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_rf.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n4_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.75
## 
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_rf.n4_3_fold_odds.left<-bst_tda_kde_5.50.5_rf.n4_3_fold$probLeft/bst_tda_kde_5.50.5_rf.n4_3_fold$probRight
bst_tda_kde_5.50.5_rf.n4_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_rf.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n4_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.8443
## 
## $winRight
## [1] 0.1557
# Bayesian Correlated Test

bct_tda_kde_5.50.5_rf.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n4_3_fold
## $left
## [1] 0.08788593
## 
## $rope
## [1] 0.6130554
## 
## $right
## [1] 0.2990587
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_rf_n4_3_fold,c(-0.01,0.01)))

### Test set diff
diff_tda_kde_5.50.5_rf.n4_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n4_rf_cf0_ov_acc)
diff_tda_kde_5.50.5_rf.n4_test
##    Accuracy 
## -0.02927928
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_rf.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf.n4_test),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n4_test
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 
#BayesFactor
#bf_tda_kde_5.50.5_rf.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf_n4_3_fold))
#bf_tda_kde_5.50.5_rf.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_rf_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_rf_n4_3_fold)
## t = 0.83012, df = 2, p-value = 0.4938
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.02244955  0.03318283
## sample estimates:
##  mean of x 
## 0.00536664
bst_tda_kde_5.50.5_rf.n4_test_odds.left<-bst_tda_kde_5.50.5_rf.n4_test$probLeft/bst_tda_kde_5.50.5_rf.n4_test$probRight
bst_tda_kde_5.50.5_rf.n4_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_rf.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf.n4_test),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n4_test
## $winLeft
## [1] 0.8431333
## 
## $winRope
## [1] 0.1568667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_rf.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_rf.n4_test))

#BayesFactor
#bf_tda_kde_5.50.5_rf.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf.n4_test)) #bf_tda_kde_5.50.5_rf.n4_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_rf.n4_test))

##Node5

Adult_TDA_KDE_5.50.5_n5_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n5.vec, 
                 Importance = T,
                 method = 'rf', 
                 trControl = fitControl,
                 tuneGrid = rfGrid, preProc = c('center','scale'),                    
                 metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
Adult_TDA_KDE_5.50.5_n5_RfFit0
## Random Forest 
## 
## 7540 samples
##  108 predictor
##    2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 5028, 5026, 5026 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     50  0.8718853  0.4208062
##    100  0.8687033  0.4155007
##    150  0.8696315  0.4182978
##    200  0.8684376  0.4132666
##    250  0.8684380  0.4146251
##    300  0.8675096  0.4093738
##    350  0.8698968  0.4190122
##    400  0.8677754  0.4096011
##    450  0.8689686  0.4181539
##    500  0.8692338  0.4176560
##    550  0.8684385  0.4172817
##    600  0.8688363  0.4166412
##    650  0.8687038  0.4139031
##    700  0.8688358  0.4175411
##    750  0.8689683  0.4146387
##    800  0.8688359  0.4171044
##    850  0.8685704  0.4129635
##    900  0.8685711  0.4155595
##    950  0.8677746  0.4109668
##   1000  0.8687034  0.4145042
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_KDE_5.50.5_n5_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8793790 0.4587947    Fold1
## 2 0.8663484 0.3978005    Fold3
## 3 0.8699284 0.4058232    Fold2
ad_tda_kde_5.50.5_n5_rf_fit0_re<-Adult_TDA_KDE_5.50.5_n5_RfFit0$resample[1]


summary(Adult_TDA_KDE_5.50.5_n5_RfFit0)
##                 Length Class      Mode     
## call                5  -none-     call     
## type                1  -none-     character
## predicted        7540  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           15080  matrix     numeric  
## oob.times        7540  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y                7540  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               1  -none-     list
vip(Adult_TDA_KDE_5.50.5_n5_RfFit0,25) + ggtitle("Adult_TDA_KDE_5.50.5_n5_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_KDE_5.50.5_n5_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n5_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n5_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n5_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6656   743
##      >50K     760  1609
##                                           
##                Accuracy : 0.8461          
##                  95% CI : (0.8388, 0.8532)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.5802          
##                                           
##  Mcnemar's Test P-Value : 0.6798          
##                                           
##             Sensitivity : 0.8975          
##             Specificity : 0.6841          
##          Pos Pred Value : 0.8996          
##          Neg Pred Value : 0.6792          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6814          
##    Detection Prevalence : 0.7575          
##       Balanced Accuracy : 0.7908          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n5_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6656   743
##      >50K     760  1609
##                                           
##                Accuracy : 0.8461          
##                  95% CI : (0.8388, 0.8532)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.5802          
##                                           
##  Mcnemar's Test P-Value : 0.6798          
##                                           
##             Sensitivity : 0.8975          
##             Specificity : 0.6841          
##          Pos Pred Value : 0.8996          
##          Neg Pred Value : 0.6792          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6814          
##    Detection Prevalence : 0.7575          
##       Balanced Accuracy : 0.7908          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n5_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.461302e-01   5.801857e-01   8.388205e-01   8.532329e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   4.564944e-99   6.798222e-01
ad_tda_kde_5.50.5_n5_rf_cf0_ov_acc<-ad_tda_kde_5.50.5_n5_rf_cf0$overall[1]
ad_tda_kde_5.50.5_n5_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.8975189            0.6840986            0.8995810 
##       Neg Pred Value            Precision               Recall 
##            0.6791895            0.8995810            0.8975189 
##                   F1           Prevalence       Detection Rate 
##            0.8985488            0.7592138            0.6814087 
## Detection Prevalence    Balanced Accuracy 
##            0.7574734            0.7908088
ad_tda_kde_5.50.5_n5_rf_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n5_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_rf_n5_3_fold<-(ad_rf_fit_re-ad_tda_kde_5.50.5_n5_rf_fit0_re)
diff_tda_kde_5.50.5_rf_n5_3_fold
##      Accuracy
## 1 -0.02259341
## 2 -0.00783305
## 3 -0.01114978
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_rf.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n5_3_fold
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_rf.n5_3_fold_odds.left<-bst_tda_kde_5.50.5_rf.n5_3_fold$probLeft/bst_tda_kde_5.50.5_rf.n5_3_fold$probRight
bst_tda_kde_5.50.5_rf.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_rf.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n5_3_fold
## $winLeft
## [1] 0.6054
## 
## $winRope
## [1] 0.3946
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_rf.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n5_3_fold
## $left
## [1] 0.7336319
## 
## $rope
## [1] 0.2444821
## 
## $right
## [1] 0.02188601
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_rf_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_rf.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf_n5_3_fold))
#bf_tda_kde_5.50.5_rf.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_rf_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_rf_n5_3_fold)
## t = -3.0997, df = 2, p-value = 0.09022
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.033096130  0.005378641
## sample estimates:
##   mean of x 
## -0.01385874
### Test set diff
diff_tda_kde_5.50.5_rf.n5_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n5_rf_cf0_ov_acc)
diff_tda_kde_5.50.5_rf.n5_test
##   Accuracy 
## 0.01248976
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_rf.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf.n5_test),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_rf.n5_test_odds.left<-bst_tda_kde_5.50.5_rf.n5_test$probLeft/bst_tda_kde_5.50.5_rf.n5_test$probRight
bst_tda_kde_5.50.5_rf.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_rf.n5_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf.n5_test),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.4605333
## 
## $winRight
## [1] 0.5394667
# Bayesian Correlated Test

bct_tda_kde_5.50.5_rf.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_rf.n5_test))

#BayesFactor
#bf_tda_kde_5.50.5_rf.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf.n5_test)) #bf_tda_kde_5.50.5_rf.n5_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_rf.n5_test))

##Non-TDA-Assisted

svmGrid<-expand.grid(sigma = c(0.1, 1, 10), C = (1:5*0.25))

#Support Vector Machine-Radial Basis 
adultSvmFit <- train(as.factor(adult_df1) ~ ., data = adult.one_hot_df4Train, 
                   Importance = T,
                   method = 'svmRadial', 
                 trControl = fitControl,
                         tuneGrid = svmGrid, preProc = c('center','scale'),
                         metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
adultSvmFit
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 22793 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 15195, 15196, 15195 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa      
##    0.1   0.25  0.8050718  0.346413571
##    0.1   0.50  0.8152943  0.408134642
##    0.1   0.75  0.8202519  0.435200975
##    0.1   1.00  0.8212610  0.447051342
##    0.1   1.25  0.8204713  0.450881072
##    1.0   0.25  0.7766419  0.136421752
##    1.0   0.50  0.7860747  0.211872997
##    1.0   0.75  0.7915588  0.256547069
##    1.0   1.00  0.7943228  0.287135922
##    1.0   1.25  0.7967359  0.311243802
##   10.0   0.25  0.7592682  0.000741839
##   10.0   0.50  0.7607160  0.015579351
##   10.0   0.75  0.7630413  0.042435399
##   10.0   1.00  0.7641820  0.068288068
##   10.0   1.25  0.7645768  0.087308496
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.
adultSvmFit$resample
##    Accuracy     Kappa Resample
## 1 0.8210055 0.4506814    Fold1
## 2 0.8221666 0.4438993    Fold2
## 3 0.8206107 0.4465733    Fold3
ad_svm_fit_re<-adultSvmFit$resample[1]

summary(adultSvmFit)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(adultSvmFit, 25) + ggtitle("non-TDA-Assited Svm")

# Predict outcome using model from training data based on testing data
predictions <- predict(adultSvmFit, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
svm_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
svm_cf
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6949  1215
##      >50K     467  1137
##                                           
##                Accuracy : 0.8278          
##                  95% CI : (0.8202, 0.8352)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.4717          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9370          
##             Specificity : 0.4834          
##          Pos Pred Value : 0.8512          
##          Neg Pred Value : 0.7089          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7114          
##    Detection Prevalence : 0.8358          
##       Balanced Accuracy : 0.7102          
##                                           
##        'Positive' Class :  <=50K          
## 
svm_cf$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.278051e-01   4.716603e-01   8.201693e-01   8.352449e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   4.641899e-61   3.989568e-74
svm_cf_ov_acc<-svm_cf$overall[1]
svm_cf$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9370280            0.4834184            0.8511759 
##       Neg Pred Value            Precision               Recall 
##            0.7088529            0.8511759            0.9370280 
##                   F1           Prevalence       Detection Rate 
##            0.8920411            0.7592138            0.7114046 
## Detection Prevalence    Balanced Accuracy 
##            0.8357903            0.7102232
svm_cf_pr_rec_f1<-svm_cf$byClass[5:7]

##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node1

Adult_TDA_PC_5.50.5_n1_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n2.vec, 
                    Importance = T,
                    method = 'svmRadial', 
                  trControl = fitControl,
                          tuneGrid = svmGrid, preProc = c('center','scale'),
                          metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.50.5_n1_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 12206 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8137, 8137, 8138 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa     
##    0.1   0.25  0.6762249  0.33461539
##    0.1   0.50  0.6908894  0.36962245
##    0.1   0.75  0.6957230  0.38197704
##    0.1   1.00  0.6970338  0.38582889
##    0.1   1.25  0.6956412  0.38396161
##    1.0   0.25  0.6146976  0.17941548
##    1.0   0.50  0.6441913  0.25304665
##    1.0   0.75  0.6551694  0.28371902
##    1.0   1.00  0.6572993  0.29270532
##    1.0   1.25  0.6601666  0.30140139
##   10.0   0.25  0.5572668  0.03234495
##   10.0   0.50  0.5749629  0.07892666
##   10.0   0.75  0.5908569  0.12199304
##   10.0   1.00  0.6009336  0.15083881
##   10.0   1.25  0.6054395  0.16481639
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.
Adult_TDA_PC_5.50.5_n1_SvmFit0$resample
##    Accuracy     Kappa Resample
## 1 0.7041042 0.3994792    Fold1
## 2 0.6950111 0.3822459    Fold2
## 3 0.6919862 0.3757616    Fold3
ad_tda_pc_5.50.5_n1_svm_fit_re<-Adult_TDA_PC_5.50.5_n1_SvmFit0 $resample[1]

summary(Adult_TDA_PC_5.50.5_n1_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(Adult_TDA_PC_5.50.5_n1_SvmFit0,25) + ggtitle("Adult_TDA_PCA_5.50.5_n1_SvmFit TDA-Assited Svm")

# Predict outcome using Adult_TDA_PC_5.50.5_n1_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n1_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n1_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n1_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   1477   355
##      >50K    5939  1997
##                                           
##                Accuracy : 0.3557          
##                  95% CI : (0.3462, 0.3652)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.0266          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.1992          
##             Specificity : 0.8491          
##          Pos Pred Value : 0.8062          
##          Neg Pred Value : 0.2516          
##              Prevalence : 0.7592          
##          Detection Rate : 0.1512          
##    Detection Prevalence : 0.1876          
##       Balanced Accuracy : 0.5241          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n1_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   1477   355
##      >50K    5939  1997
##                                           
##                Accuracy : 0.3557          
##                  95% CI : (0.3462, 0.3652)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.0266          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.1992          
##             Specificity : 0.8491          
##          Pos Pred Value : 0.8062          
##          Neg Pred Value : 0.2516          
##              Prevalence : 0.7592          
##          Detection Rate : 0.1512          
##    Detection Prevalence : 0.1876          
##       Balanced Accuracy : 0.5241          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n1_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##     0.35565111     0.02663693     0.34615081     0.36523754     0.75921376 
## AccuracyPValue  McnemarPValue 
##     1.00000000     0.00000000
ad_tda_pc_5.50.5_n1_svm_cf0_ov_acc<-ad_tda_pc_5.50.5_n1_svm_cf0$overall[1]
ad_tda_pc_5.50.5_n1_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.1991640            0.8490646            0.8062227 
##       Neg Pred Value            Precision               Recall 
##            0.2516381            0.8062227            0.1991640 
##                   F1           Prevalence       Detection Rate 
##            0.3194204            0.7592138            0.1512080 
## Detection Prevalence    Balanced Accuracy 
##            0.1875512            0.5241143
ad_tda_pc_5.50.5_n1_svm_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n1_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_svm_n1_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.50.5_n1_svm_fit_re)
diff_tda_pca_5.50.5_svm_n1_3_fold
##    Accuracy
## 1 0.1169013
## 2 0.1271556
## 3 0.1286245
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_svm.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n1_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_svm.n1_3_fold_odds.left<-bst_tda_pca_5.50.5_svm.n1_3_fold$probLeft/bst_tda_pca_5.50.5_svm.n1_3_fold$probRight
bst_tda_pca_5.50.5_svm.n1_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_svm.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n1_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.0091
## 
## $winRight
## [1] 0.9909
# Bayesian Correlated Test

bct_tda_pca_5.50.5_svm.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n1_3_fold
## $left
## [1] 0.0005023475
## 
## $rope
## [1] 0.0001909143
## 
## $right
## [1] 0.9993067
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_svm_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm_n1_3_fold))
#bf_tda_pca_5.50.5_rf.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_svm_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_svm_n1_3_fold)
## t = 33.69, df = 2, p-value = 0.0008799
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.1083617 0.1400925
## sample estimates:
## mean of x 
## 0.1242271
### Test set diff
diff_tda_pca_5.50.5_svm.n1_test<-(svm_cf_ov_acc - ad_tda_pc_5.50.5_n1_svm_cf0_ov_acc)
diff_tda_pca_5.50.5_svm.n1_test
## Accuracy 
## 0.472154
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_svm.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm.n1_test),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n1_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_svm.n1_test_odds.left<-bst_tda_pca_5.50.5_svm.n1_test$probLeft/bst_tda_pca_5.50.5_svm.n1_test$probRight
bst_tda_pca_5.50.5_svm.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_svm.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm.n1_test),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n1_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1596
## 
## $winRight
## [1] 0.8404
# Bayesian Correlated Test

bct_tda_pca_5.50.5_svm.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_svm.n1_test)))

#BayesFactor
#bf_tda_pca_5.50.5_svm.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm.n1_test)) #bf_tda_pca_5.50.5_svm.n1_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_svm.n1_test))

##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node2

Adult_TDA_PC_5.50.5_n2_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n2.vec, 
                    Importance = T,
                    method = 'svmRadial', 
                  trControl = fitControl,
                          tuneGrid = svmGrid, preProc = c('center','scale'),
                          metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Honduras, V14.Thailand, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Honduras, V14.Thailand, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Honduras, V14.Thailand, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Honduras, V14.Thailand, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Honduras, V14.Thailand, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Honduras, V14.Thailand, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Honduras, V14.Thailand, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Honduras, V14.Thailand, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Honduras, V14.Thailand, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Honduras, V14.Thailand, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Honduras, V14.Thailand, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Honduras, V14.Thailand, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Honduras, V14.Thailand, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Honduras, V14.Thailand, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Honduras, V14.Thailand, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.50.5_n2_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 12206 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8137, 8137, 8138 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa     
##    0.1   0.25  0.6778625  0.33756898
##    0.1   0.50  0.6918720  0.37103428
##    0.1   0.75  0.6952312  0.38015865
##    0.1   1.00  0.6962143  0.38371387
##    0.1   1.25  0.6976889  0.38760743
##    1.0   0.25  0.6143694  0.17845054
##    1.0   0.50  0.6498438  0.26512742
##    1.0   0.75  0.6572170  0.28795443
##    1.0   1.00  0.6627063  0.30388347
##    1.0   1.25  0.6648365  0.31051449
##   10.0   0.25  0.5559561  0.02963286
##   10.0   0.50  0.5729967  0.07417054
##   10.0   0.75  0.5874976  0.11367689
##   10.0   1.00  0.6002785  0.14891491
##   10.0   1.25  0.6036371  0.16034175
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.25.
Adult_TDA_PC_5.50.5_n2_SvmFit0$resample
##    Accuracy     Kappa Resample
## 1 0.7053330 0.4045356    Fold1
## 2 0.6885447 0.3686743    Fold3
## 3 0.6991890 0.3896124    Fold2
ad_tda_pc_5.50.5_n2_svm_fit_re<-Adult_TDA_PC_5.50.5_n2_SvmFit0 $resample[1]

summary(Adult_TDA_PC_5.50.5_n2_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(Adult_TDA_PC_5.50.5_n2_SvmFit0,25) + ggtitle("Adult_TDA_PCA_5.50.5_n2_SvmFit TDA-Assited Svm")

# Predict outcome using Adult_TDA_PC_5.50.5_n2_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n2_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n2_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n2_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   1496   347
##      >50K    5920  2005
##                                          
##                Accuracy : 0.3584         
##                  95% CI : (0.3489, 0.368)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 1              
##                                          
##                   Kappa : 0.03           
##                                          
##  Mcnemar's Test P-Value : <2e-16         
##                                          
##             Sensitivity : 0.2017         
##             Specificity : 0.8525         
##          Pos Pred Value : 0.8117         
##          Neg Pred Value : 0.2530         
##              Prevalence : 0.7592         
##          Detection Rate : 0.1532         
##    Detection Prevalence : 0.1887         
##       Balanced Accuracy : 0.5271         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_pc_5.50.5_n2_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   1496   347
##      >50K    5920  2005
##                                          
##                Accuracy : 0.3584         
##                  95% CI : (0.3489, 0.368)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 1              
##                                          
##                   Kappa : 0.03           
##                                          
##  Mcnemar's Test P-Value : <2e-16         
##                                          
##             Sensitivity : 0.2017         
##             Specificity : 0.8525         
##          Pos Pred Value : 0.8117         
##          Neg Pred Value : 0.2530         
##              Prevalence : 0.7592         
##          Detection Rate : 0.1532         
##    Detection Prevalence : 0.1887         
##       Balanced Accuracy : 0.5271         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_pc_5.50.5_n2_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##     0.35841523     0.02995696     0.34889776     0.36801720     0.75921376 
## AccuracyPValue  McnemarPValue 
##     1.00000000     0.00000000
ad_tda_pc_5.50.5_n2_svm_cf0_ov_acc<-ad_tda_pc_5.50.5_n2_svm_cf0$overall[1]
ad_tda_pc_5.50.5_n2_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.2017260            0.8524660            0.8117200 
##       Neg Pred Value            Precision               Recall 
##            0.2529968            0.8117200            0.2017260 
##                   F1           Prevalence       Detection Rate 
##            0.3231450            0.7592138            0.1531532 
## Detection Prevalence    Balanced Accuracy 
##            0.1886773            0.5270960
ad_tda_pc_5.50.5_n2_svm_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n2_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_svm_n2_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.50.5_n2_svm_fit_re)
diff_tda_pca_5.50.5_svm_n2_3_fold
##    Accuracy
## 1 0.1156725
## 2 0.1336219
## 3 0.1214217
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_svm.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_svm.n2_3_fold_odds.left<-bst_tda_pca_5.50.5_svm.n2_3_fold$probLeft/bst_tda_pca_5.50.5_svm.n2_3_fold$probRight
bst_tda_pca_5.50.5_svm.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_svm.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.009666667
## 
## $winRight
## [1] 0.9903333
# Bayesian Correlated Test

bct_tda_pca_5.50.5_svm.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n2_3_fold
## $left
## [1] 0.00104314
## 
## $rope
## [1] 0.0003980152
## 
## $right
## [1] 0.9985588
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_svm_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm_n2_3_fold))
#bf_tda_pca_5.50.5_rf.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_svm_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_svm_n2_3_fold)
## t = 23.351, df = 2, p-value = 0.001829
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.1008028 0.1463413
## sample estimates:
## mean of x 
##  0.123572
### Test set diff
diff_tda_pca_5.50.5_svm.n2_test<-(svm_cf_ov_acc - ad_tda_pc_5.50.5_n2_svm_cf0_ov_acc)
diff_tda_pca_5.50.5_svm.n2_test
##  Accuracy 
## 0.4693898
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_svm.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm.n2_test),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_svm.n2_test_odds.left<-bst_tda_pca_5.50.5_svm.n2_test$probLeft/bst_tda_pca_5.50.5_svm.n2_test$probRight
bst_tda_pca_5.50.5_svm.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_svm.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm.n2_test),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1589667
## 
## $winRight
## [1] 0.8410333
# Bayesian Correlated Test

bct_tda_pca_5.50.5_svm.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_svm.n2_test)))

#BayesFactor
#bf_tda_pca_5.50.5_svm.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm.n2_test)) #bf_tda_pca_5.50.5_svm.n2_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_svm.n2_test))

##Node3

Adult_TDA_PC_5.50.5_n3_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n3.vec, 
                    Importance = T,
                    method = 'svmRadial', 
                  trControl = fitControl,
                          tuneGrid = svmGrid, preProc = c('center','scale'),
                          metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.50.5_n3_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 13240 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8827, 8827, 8826 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa        
##    0.1   0.25  0.7842145   0.1162425046
##    0.1   0.50  0.7935045   0.1905707959
##    0.1   0.75  0.7977341   0.2296776075
##    0.1   1.00  0.7996222   0.2553808768
##    0.1   1.25  0.7993957   0.2673367247
##    1.0   0.25  0.7744714   0.0237078435
##    1.0   0.50  0.7771147   0.0587266562
##    1.0   0.75  0.7795317   0.0906697532
##    1.0   1.00  0.7814952   0.1231345858
##    1.0   1.25  0.7815709   0.1460611257
##   10.0   0.25  0.7713746  -0.0001509876
##   10.0   0.50  0.7716012   0.0038777846
##   10.0   0.75  0.7717523   0.0123107426
##   10.0   1.00  0.7712991   0.0258898059
##   10.0   1.25  0.7693353   0.0297386428
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.
Adult_TDA_PC_5.50.5_n3_SvmFit0$resample
##    Accuracy     Kappa Resample
## 1 0.7996828 0.2554193    Fold1
## 2 0.7978699 0.2563524    Fold2
## 3 0.8013140 0.2543709    Fold3
ad_tda_pc_5.50.5_n3_svm_fit_re<-Adult_TDA_PC_5.50.5_n3_SvmFit0 $resample[1]

summary(Adult_TDA_PC_5.50.5_n3_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(Adult_TDA_PC_5.50.5_n3_SvmFit0,25) + ggtitle("Adult_TDA_PCA_5.50.5_n3_SvmFit TDA-Assited Svm")

# Predict outcome using Adult_TDA_PC_5.50.5_n3_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n3_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n3_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n3_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6741  1787
##      >50K     675   565
##                                           
##                Accuracy : 0.748           
##                  95% CI : (0.7392, 0.7565)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.9954          
##                                           
##                   Kappa : 0.1779          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.9090          
##             Specificity : 0.2402          
##          Pos Pred Value : 0.7905          
##          Neg Pred Value : 0.4556          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6901          
##    Detection Prevalence : 0.8731          
##       Balanced Accuracy : 0.5746          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n3_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6741  1787
##      >50K     675   565
##                                           
##                Accuracy : 0.748           
##                  95% CI : (0.7392, 0.7565)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.9954          
##                                           
##                   Kappa : 0.1779          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.9090          
##             Specificity : 0.2402          
##          Pos Pred Value : 0.7905          
##          Neg Pred Value : 0.4556          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6901          
##    Detection Prevalence : 0.8731          
##       Balanced Accuracy : 0.5746          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n3_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.479525e-01   1.779221e-01   7.392178e-01   7.565392e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   9.953835e-01  4.835896e-111
ad_tda_pc_5.50.5_n3_svm_cf0_ov_acc<-ad_tda_pc_5.50.5_n3_svm_cf0$overall[1]
ad_tda_pc_5.50.5_n3_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9089806            0.2402211            0.7904550 
##       Neg Pred Value            Precision               Recall 
##            0.4556452            0.7904550            0.9089806 
##                   F1           Prevalence       Detection Rate 
##            0.8455845            0.7592138            0.6901106 
## Detection Prevalence    Balanced Accuracy 
##            0.8730549            0.5746008
ad_tda_pc_5.50.5_n3_svm_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n3_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_svm_n3_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.50.5_n3_svm_fit_re)
diff_tda_pca_5.50.5_svm_n3_3_fold
##     Accuracy
## 1 0.02132277
## 2 0.02429671
## 3 0.01929669
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_svm.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n3_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_svm.n3_3_fold_odds.left<-bst_tda_pca_5.50.5_svm.n3_3_fold$probLeft/bst_tda_pca_5.50.5_svm.n3_3_fold$probRight
bst_tda_pca_5.50.5_svm.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_svm.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n3_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.0367
## 
## $winRight
## [1] 0.9633
# Bayesian Correlated Test

bct_tda_pca_5.50.5_svm.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n3_3_fold
## $left
## [1] 0.001398239
## 
## $rope
## [1] 0.00866563
## 
## $right
## [1] 0.9899361
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_svm_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm_n3_3_fold))
#bf_tda_pca_5.50.5_rf.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_svm_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_svm_n3_3_fold)
## t = 14.903, df = 2, p-value = 0.004473
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.01539126 0.02788619
## sample estimates:
##  mean of x 
## 0.02163872
### Test set diff
diff_tda_pca_5.50.5_svm.n3_test<-(svm_cf_ov_acc - ad_tda_pc_5.50.5_n3_svm_cf0_ov_acc)
diff_tda_pca_5.50.5_svm.n3_test
##   Accuracy 
## 0.07985258
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_svm.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm.n3_test),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n3_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_svm.n3_test_odds.left<-bst_tda_pca_5.50.5_svm.n3_test$probLeft/bst_tda_pca_5.50.5_svm.n3_test$probRight
bst_tda_pca_5.50.5_svm.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_svm.n3_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm.n3_test),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n3_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1608
## 
## $winRight
## [1] 0.8392
# Bayesian Correlated Test

bct_tda_pca_5.50.5_svm.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_svm.n3_test)))

#BayesFactor
#bf_tda_pca_5.50.5_svm.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm.n3_test)) #bf_tda_pca_5.50.5_svm.n3_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_svm.n3_test))


##Node4

Adult_TDA_PC_5.50.5_n4_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n4.vec, 
                    Importance = T,
                    method = 'svmRadial', 
                  trControl = fitControl,
                          tuneGrid = svmGrid, preProc = c('center','scale'),
                          metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.50.5_n4_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 16700 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 11134, 11133, 11133 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa      
##    0.1   0.25  0.9448503  0.009245734
##    0.1   0.50  0.9452695  0.046774504
##    0.1   0.75  0.9456287  0.080597130
##    0.1   1.00  0.9453293  0.100635769
##    0.1   1.25  0.9451497  0.118352999
##    1.0   0.25  0.9449701  0.011550314
##    1.0   0.50  0.9455089  0.045970741
##    1.0   0.75  0.9458084  0.072781270
##    1.0   1.00  0.9455688  0.080406156
##    1.0   1.25  0.9449101  0.089703945
##   10.0   0.25  0.9449102  0.000000000
##   10.0   0.50  0.9449701  0.005877392
##   10.0   0.75  0.9449701  0.013467304
##   10.0   1.00  0.9448503  0.016915957
##   10.0   1.25  0.9444311  0.021494283
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 1 and C = 0.75.
Adult_TDA_PC_5.50.5_n4_SvmFit0$resample
##    Accuracy      Kappa Resample
## 1 0.9453827 0.06628466    Fold1
## 2 0.9457518 0.06762623    Fold3
## 3 0.9462906 0.08443292    Fold2
ad_tda_pc_5.50.5_n4_svm_fit_re<-Adult_TDA_PC_5.50.5_n4_SvmFit0 $resample[1]

summary(Adult_TDA_PC_5.50.5_n4_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(Adult_TDA_PC_5.50.5_n4_SvmFit0,25) + ggtitle("Adult_TDA_PCA_5.50.5_n4_SvmFit TDA-Assited Svm")

# Predict outcome using Adult_TDA_PC_5.50.5_n4_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n4_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n4_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n4_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7411  2318
##      >50K       5    34
##                                           
##                Accuracy : 0.7622          
##                  95% CI : (0.7536, 0.7706)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.2504          
##                                           
##                   Kappa : 0.0207          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.99933         
##             Specificity : 0.01446         
##          Pos Pred Value : 0.76174         
##          Neg Pred Value : 0.87179         
##              Prevalence : 0.75921         
##          Detection Rate : 0.75870         
##    Detection Prevalence : 0.99601         
##       Balanced Accuracy : 0.50689         
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n4_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7411  2318
##      >50K       5    34
##                                           
##                Accuracy : 0.7622          
##                  95% CI : (0.7536, 0.7706)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.2504          
##                                           
##                   Kappa : 0.0207          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.99933         
##             Specificity : 0.01446         
##          Pos Pred Value : 0.76174         
##          Neg Pred Value : 0.87179         
##              Prevalence : 0.75921         
##          Detection Rate : 0.75870         
##    Detection Prevalence : 0.99601         
##       Balanced Accuracy : 0.50689         
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n4_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##     0.76218264     0.02074795     0.75361100     0.77059767     0.75921376 
## AccuracyPValue  McnemarPValue 
##     0.25037066     0.00000000
ad_tda_pc_5.50.5_n4_svm_cf0_ov_acc<-ad_tda_pc_5.50.5_n4_svm_cf0$overall[1]
ad_tda_pc_5.50.5_n4_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##           0.99932578           0.01445578           0.76174324 
##       Neg Pred Value            Precision               Recall 
##           0.87179487           0.76174324           0.99932578 
##                   F1           Prevalence       Detection Rate 
##           0.86450860           0.75921376           0.75870188 
## Detection Prevalence    Balanced Accuracy 
##           0.99600737           0.50689078
ad_tda_pc_5.50.5_n4_svm_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n4_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_svm_n4_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.50.5_n4_svm_fit_re)
diff_tda_pca_5.50.5_svm_n4_3_fold
##     Accuracy
## 1 -0.1243772
## 2 -0.1235851
## 3 -0.1256800
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_svm.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n4_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_svm.n4_3_fold_odds.left<-bst_tda_pca_5.50.5_svm.n4_3_fold$probLeft/bst_tda_pca_5.50.5_svm.n4_3_fold$probRight
bst_tda_pca_5.50.5_svm.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_svm.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n4_3_fold
## $winLeft
## [1] 0.9907
## 
## $winRope
## [1] 0.0093
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.50.5_svm.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n4_3_fold
## $left
## [1] 0.9999811
## 
## $rope
## [1] 5.214165e-06
## 
## $right
## [1] 1.373363e-05
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_svm_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm_n4_3_fold))
#bf_tda_pca_5.50.5_rf.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_svm_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_svm_n4_3_fold)
## t = -203.94, df = 2, p-value = 2.404e-05
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1271750 -0.1219198
## sample estimates:
##  mean of x 
## -0.1245474
### Test set diff
diff_tda_pca_5.50.5_svm.n4_test<-(svm_cf_ov_acc - ad_tda_pc_5.50.5_n4_svm_cf0_ov_acc)
diff_tda_pca_5.50.5_svm.n4_test
##   Accuracy 
## 0.06562244
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_svm.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm.n4_test),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_svm.n4_test_odds.left<-bst_tda_pca_5.50.5_svm.n4_test$probLeft/bst_tda_pca_5.50.5_svm.n4_test$probRight
bst_tda_pca_5.50.5_svm.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_svm.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm.n4_test),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1563333
## 
## $winRight
## [1] 0.8436667
# Bayesian Correlated Test

bct_tda_pca_5.50.5_svm.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_svm.n4_test)))

#BayesFactor
#bf_tda_pca_5.50.5_svm.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm.n4_test)) #bf_tda_pca_5.50.5_svm.n4_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_svm.n4_test))

##Node5

Adult_TDA_PC_5.50.5_n5_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n5.vec, 
                    Importance = T,
                    method = 'svmRadial', 
                  trControl = fitControl,
                          tuneGrid = svmGrid, preProc = c('center','scale'),
                          metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.50.5_n5_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 14404 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 9603, 9602, 9603 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa
##    0.1   0.25  0.9979867  0    
##    0.1   0.50  0.9979867  0    
##    0.1   0.75  0.9979867  0    
##    0.1   1.00  0.9979867  0    
##    0.1   1.25  0.9979867  0    
##    1.0   0.25  0.9979867  0    
##    1.0   0.50  0.9979867  0    
##    1.0   0.75  0.9979867  0    
##    1.0   1.00  0.9979867  0    
##    1.0   1.25  0.9979867  0    
##   10.0   0.25  0.9979867  0    
##   10.0   0.50  0.9979867  0    
##   10.0   0.75  0.9979867  0    
##   10.0   1.00  0.9979867  0    
##   10.0   1.25  0.9979867  0    
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 10 and C = 0.25.
Adult_TDA_PC_5.50.5_n5_SvmFit0$resample
##    Accuracy Kappa Resample
## 1 0.9981254     0    Fold1
## 2 0.9979175     0    Fold2
## 3 0.9979171     0    Fold3
ad_tda_pc_5.50.5_n5_svm_fit_re<-Adult_TDA_PC_5.50.5_n5_SvmFit0 $resample[1]

summary(Adult_TDA_PC_5.50.5_n5_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(Adult_TDA_PC_5.50.5_n5_SvmFit0,25) + ggtitle("Adult_TDA_PCA_5.50.5_n5_SvmFit TDA-Assited Svm")

# Predict outcome using Adult_TDA_PC_5.50.5_n5_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n5_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n5_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n5_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n5_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n5_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.7592138      0.0000000      0.7506071      0.7676657      0.7592138 
## AccuracyPValue  McnemarPValue 
##      0.5055358      0.0000000
ad_tda_pc_5.50.5_n5_svm_cf0_ov_acc<-ad_tda_pc_5.50.5_n5_svm_cf0$overall[1]
ad_tda_pc_5.50.5_n5_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            1.0000000            0.0000000            0.7592138 
##       Neg Pred Value            Precision               Recall 
##                  NaN            0.7592138            1.0000000 
##                   F1           Prevalence       Detection Rate 
##            0.8631285            0.7592138            0.7592138 
## Detection Prevalence    Balanced Accuracy 
##            1.0000000            0.5000000
ad_tda_pc_5.50.5_n5_svm_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n5_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_svm_n5_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.50.5_n5_svm_fit_re)
diff_tda_pca_5.50.5_svm_n5_3_fold
##     Accuracy
## 1 -0.1771199
## 2 -0.1757509
## 3 -0.1773064
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_svm.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n5_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_svm.n5_3_fold_odds.left<-bst_tda_pca_5.50.5_svm.n5_3_fold$probLeft/bst_tda_pca_5.50.5_svm.n5_3_fold$probRight
bst_tda_pca_5.50.5_svm.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_svm.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm_n5_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n5_3_fold
## $winLeft
## [1] 0.9903667
## 
## $winRope
## [1] 0.009633333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.50.5_svm.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n5_3_fold
## $left
## [1] 0.9999942
## 
## $rope
## [1] 1.169259e-06
## 
## $right
## [1] 4.597945e-06
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_svm_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm_n5_3_fold))
#bf_tda_pca_5.50.5_rf.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_svm_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_svm_n5_3_fold)
## t = -360.38, df = 2, p-value = 7.7e-06
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1788357 -0.1746158
## sample estimates:
##  mean of x 
## -0.1767257
### Test set diff
diff_tda_pca_5.50.5_svm.n5_test<-(svm_cf_ov_acc - ad_tda_pc_5.50.5_n5_svm_cf0_ov_acc)
diff_tda_pca_5.50.5_svm.n5_test
##   Accuracy 
## 0.06859132
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_svm.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm.n5_test),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_svm.n5_test_odds.left<-bst_tda_pca_5.50.5_svm.n5_test$probLeft/bst_tda_pca_5.50.5_svm.n5_test$probRight
bst_tda_pca_5.50.5_svm.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_svm.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm.n5_test),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1572
## 
## $winRight
## [1] 0.8428
# Bayesian Correlated Test

bct_tda_pca_5.50.5_svm.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_svm.n5_test)))

#BayesFactor
#bf_tda_pca_5.50.5_svm.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm.n5_test)) #bf_tda_pca_5.50.5_svm.n5_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_svm.n5_test))


##With TDA KDE filter 5 intervals, 50% overlap, 5 bins 
##Node1


Adult_TDA_KDE_5.50.5_n1_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n1.vec, 
                    Importance = T,
                    method = 'svmRadial', 
                  trControl = fitControl,
                          tuneGrid = svmGrid, preProc = c('center','scale'),
                          metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands,
## V14.Yugoslavia

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands,
## V14.Yugoslavia
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands,
## V14.Yugoslavia
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands,
## V14.Yugoslavia
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands,
## V14.Yugoslavia
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands,
## V14.Yugoslavia
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands,
## V14.Yugoslavia
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands,
## V14.Yugoslavia
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands,
## V14.Yugoslavia
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands,
## V14.Yugoslavia
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands,
## V14.Yugoslavia
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands,
## V14.Yugoslavia
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands,
## V14.Yugoslavia
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands,
## V14.Yugoslavia
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands,
## V14.Yugoslavia
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.50.5_n1_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 13387 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8925, 8924, 8925 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa     
##    0.1   0.25  0.8009261  0.35791608
##    0.1   0.50  0.8075741  0.40507324
##    0.1   0.75  0.8117572  0.43067412
##    0.1   1.00  0.8131019  0.44455011
##    0.1   1.25  0.8131766  0.45046802
##    1.0   0.25  0.7614847  0.12988378
##    1.0   0.50  0.7723909  0.19892292
##    1.0   0.75  0.7803838  0.24996496
##    1.0   1.00  0.7834465  0.28031014
##    1.0   1.25  0.7850900  0.30099110
##   10.0   0.25  0.7407933  0.00000000
##   10.0   0.50  0.7423619  0.01112966
##   10.0   0.75  0.7457233  0.03639418
##   10.0   1.00  0.7494582  0.06782569
##   10.0   1.25  0.7516991  0.08859092
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.25.
Adult_TDA_KDE_5.50.5_n1_SvmFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8081578 0.4391419    Fold1
## 2 0.8133124 0.4462817    Fold3
## 3 0.8180596 0.4659804    Fold2
ad_tda_kde_5.50.5_n1_svm_fit_re<-Adult_TDA_KDE_5.50.5_n1_SvmFit0 $resample[1]

summary(Adult_TDA_PC_5.50.5_n1_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(Adult_TDA_KDE_5.50.5_n1_SvmFit0,25) + ggtitle("Adult_TDA_KDE_5.50.5_n1_SvmFit TDA-Assited Svm")

# Predict outcome using Adult_TDA_KDE_5.50.5_n1_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n1_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n1_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n1_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6974   907
##      >50K     442  1445
##                                           
##                Accuracy : 0.8619          
##                  95% CI : (0.8549, 0.8687)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5949          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9404          
##             Specificity : 0.6144          
##          Pos Pred Value : 0.8849          
##          Neg Pred Value : 0.7658          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7140          
##    Detection Prevalence : 0.8068          
##       Balanced Accuracy : 0.7774          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n1_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6974   907
##      >50K     442  1445
##                                           
##                Accuracy : 0.8619          
##                  95% CI : (0.8549, 0.8687)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5949          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9404          
##             Specificity : 0.6144          
##          Pos Pred Value : 0.8849          
##          Neg Pred Value : 0.7658          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7140          
##    Detection Prevalence : 0.8068          
##       Balanced Accuracy : 0.7774          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n1_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.618960e-01   5.949281e-01   8.548950e-01   8.686804e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  1.765119e-140   1.385980e-36
ad_tda_kde_5.50.5_n1_svm_cf0_ov_acc<-ad_tda_kde_5.50.5_n1_svm_cf0$overall[1]
ad_tda_kde_5.50.5_n1_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9403991            0.6143707            0.8849131 
##       Neg Pred Value            Precision               Recall 
##            0.7657658            0.8849131            0.9403991 
##                   F1           Prevalence       Detection Rate 
##            0.9118128            0.7592138            0.7139640 
## Detection Prevalence    Balanced Accuracy 
##            0.8068182            0.7773849
ad_tda_kde_5.50.5_n1_svm_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n1_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_svm_n1_3_fold<-(ad_rf_fit_re - ad_tda_kde_5.50.5_n1_svm_fit_re)
diff_tda_kde_5.50.5_svm_n1_3_fold
##     Accuracy
## 1 0.04862780
## 2 0.04520298
## 3 0.04071902
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_svm.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n1_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_svm.n1_3_fold_odds.left<-bst_tda_kde_5.50.5_svm.n1_3_fold$probLeft/bst_tda_kde_5.50.5_svm.n1_3_fold$probRight
bst_tda_kde_5.50.5_svm.n1_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_svm.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n1_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.008966667
## 
## $winRight
## [1] 0.9910333
# Bayesian Correlated Test

bct_tda_kde_5.50.5_svm.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n1_3_fold
## $left
## [1] 0.001157901
## 
## $rope
## [1] 0.001695744
## 
## $right
## [1] 0.9971464
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_svm_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_svm.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm_n1_3_fold))
#bf_tda_kde_5.50.5_svm.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_svm_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_svm_n1_3_fold)
## t = 19.586, df = 2, p-value = 0.002597
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.03499738 0.05470249
## sample estimates:
##  mean of x 
## 0.04484993
### Test set diff
diff_tda_kde_5.50.5_svm.n1_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n1_svm_cf0_ov_acc)
diff_tda_kde_5.50.5_svm.n1_test
##     Accuracy 
## -0.003276003
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_svm.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm.n1_test),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n1_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_svm.n1_test_odds.left<-bst_tda_kde_5.50.5_svm.n1_test$probLeft/bst_tda_kde_5.50.5_svm.n1_test$probRight
bst_tda_kde_5.50.5_svm.n1_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_svm.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm.n1_test),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n1_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_svm.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_svm.n1_test))

#BayesFactor
#bf_tda_kde_5.50.5_svm.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm.n1_test)) #bf_tda_kde_5.50.5_svm.n1_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_svm.n1_test))


##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node2

Adult_TDA_KDE_5.50.5_n2_SvmFit0 <- train(as.factor(adult_df1) ~ ., data =  tda.m_kde_adult_5.50.5.n2.vec, 
                    Importance = T,
                    method = 'svmRadial', 
                  trControl = fitControl,
                          tuneGrid = svmGrid, preProc = c('center','scale'),
                          metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.50.5_n2_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 12638 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8426, 8425, 8425 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa     
##    0.1   0.25  0.7921346  0.38885952
##    0.1   0.50  0.8017089  0.43780314
##    0.1   0.75  0.8059028  0.46187097
##    0.1   1.00  0.8082766  0.47566168
##    0.1   1.25  0.8081184  0.47980336
##    1.0   0.25  0.7486153  0.15700715
##    1.0   0.50  0.7627788  0.23937027
##    1.0   0.75  0.7718780  0.29084582
##    1.0   1.00  0.7765463  0.32252259
##    1.0   1.25  0.7785243  0.34292475
##   10.0   0.25  0.7208419  0.00000000
##   10.0   0.50  0.7249565  0.02425912
##   10.0   0.75  0.7285171  0.05252581
##   10.0   1.00  0.7314447  0.08145614
##   10.0   1.25  0.7342932  0.10447355
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.
Adult_TDA_KDE_5.50.5_n2_SvmFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8081671 0.4791654    Fold1
## 2 0.8075006 0.4720081    Fold2
## 3 0.8091621 0.4758116    Fold3
ad_tda_kde_5.50.5_n2_svm_fit_re<-Adult_TDA_KDE_5.50.5_n2_SvmFit0$resample[1]

summary(Adult_TDA_KDE_5.50.5_n2_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(Adult_TDA_KDE_5.50.5_n2_SvmFit0,25) + ggtitle("Adult_TDA_KDE_5.50.5_n2_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_KDE_5.50.5_n2_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n2_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n2_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n2_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7038  1006
##      >50K     378  1346
##                                           
##                Accuracy : 0.8583          
##                  95% CI : (0.8512, 0.8652)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5736          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9490          
##             Specificity : 0.5723          
##          Pos Pred Value : 0.8749          
##          Neg Pred Value : 0.7807          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7205          
##    Detection Prevalence : 0.8235          
##       Balanced Accuracy : 0.7607          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n2_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7038  1006
##      >50K     378  1346
##                                           
##                Accuracy : 0.8583          
##                  95% CI : (0.8512, 0.8652)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5736          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9490          
##             Specificity : 0.5723          
##          Pos Pred Value : 0.8749          
##          Neg Pred Value : 0.7807          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7205          
##    Detection Prevalence : 0.8235          
##       Balanced Accuracy : 0.7607          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n2_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.583129e-01   5.735987e-01   8.512392e-01   8.651721e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  2.550417e-130   9.827767e-64
ad_tda_kde_5.50.5_n2_svm_cf0_ov_acc<-ad_tda_kde_5.50.5_n2_svm_cf0$overall[1]
ad_tda_kde_5.50.5_n2_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9490291            0.5722789            0.8749378 
##       Neg Pred Value            Precision               Recall 
##            0.7807425            0.8749378            0.9490291 
##                   F1           Prevalence       Detection Rate 
##            0.9104787            0.7592138            0.7205160 
## Detection Prevalence    Balanced Accuracy 
##            0.8235053            0.7606540
ad_tda_kde_5.50.5_n2_svm_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n2_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_svm_n2_3_fold<-(ad_rf_fit_re - ad_tda_kde_5.50.5_n2_svm_fit_re)
diff_tda_kde_5.50.5_svm_n2_3_fold
##     Accuracy
## 1 0.04861843
## 2 0.05101481
## 3 0.04961651
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_svm.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_svm.n2_3_fold_odds.left<-bst_tda_kde_5.50.5_svm.n2_3_fold$probLeft/bst_tda_kde_5.50.5_svm.n2_3_fold$probRight
bst_tda_kde_5.50.5_svm.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_svm.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.008466667
## 
## $winRight
## [1] 0.9915333
# Bayesian Correlated Test

bct_tda_kde_5.50.5_svm.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n2_3_fold
## $left
## [1] 9.017033e-05
## 
## $rope
## [1] 0.0001134954
## 
## $right
## [1] 0.9997963
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_svm_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_svm.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm_n2_3_fold))
#bf_tda_kde_5.50.5_svm.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_svm_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_svm_n2_3_fold)
## t = 71.584, df = 2, p-value = 0.0001951
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.04675965 0.05274018
## sample estimates:
##  mean of x 
## 0.04974992
### Test set diff
diff_tda_kde_5.50.5_svm.n2_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n2_svm_cf0_ov_acc)
diff_tda_kde_5.50.5_svm.n2_test
##     Accuracy 
## 0.0003071253
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_svm.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm.n2_test),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_svm.n2_test_odds.left<-bst_tda_kde_5.50.5_svm.n2_test$probLeft/bst_tda_kde_5.50.5_svm.n2_test$probRight
bst_tda_kde_5.50.5_svm.n2_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_svm.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm.n2_test),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_svm.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_svm.n2_test))

#BayesFactor
#bf_tda_kde_5.50.5_svm.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm.n2_test)) #bf_tda_kde_5.50.5_svm.n2_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_svm.n2_test))

##Node3

Adult_TDA_KDE_5.50.5_n3_SvmFit0 <- train(as.factor(adult_df1) ~ ., data =  tda.m_kde_adult_5.50.5.n3.vec, 
                    Importance = T,
                    method = 'svmRadial', 
                  trControl = fitControl,
                          tuneGrid = svmGrid, preProc = c('center','scale'),
                          metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.50.5_n3_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 11634 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 7755, 7756, 7757 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa      
##    0.1   0.25  0.7885505  0.379228039
##    0.1   0.50  0.7985215  0.429985308
##    0.1   0.75  0.8013581  0.448513619
##    0.1   1.00  0.8013583  0.455557182
##    0.1   1.25  0.8022177  0.462238954
##    1.0   0.25  0.7519339  0.173527029
##    1.0   0.50  0.7653426  0.251618382
##    1.0   0.75  0.7716172  0.293702811
##    1.0   1.00  0.7735083  0.316325258
##    1.0   1.25  0.7746258  0.333441663
##   10.0   0.25  0.7243425  0.001801901
##   10.0   0.50  0.7270928  0.025341098
##   10.0   0.75  0.7306169  0.057745494
##   10.0   1.00  0.7367196  0.100794855
##   10.0   1.25  0.7385245  0.122084010
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.25.
Adult_TDA_KDE_5.50.5_n3_SvmFit0$resample
##    Accuracy     Kappa Resample
## 1 0.7994328 0.4527386    Fold1
## 2 0.8006190 0.4583335    Fold3
## 3 0.8066013 0.4756447    Fold2
ad_tda_kde_5.50.5_n3_svm_fit_re<-Adult_TDA_KDE_5.50.5_n3_SvmFit0 $resample[1]

summary(Adult_TDA_KDE_5.50.5_n3_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(Adult_TDA_KDE_5.50.5_n3_SvmFit0,25) + ggtitle("Adult_TDA_KDE_5.50.5_n3_SvmFit TDA-Assited Svm")

# Predict outcome using Adult_TDA_KDE_5.50.5_n3_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n3_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n3_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n3_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7037  1081
##      >50K     379  1271
##                                           
##                Accuracy : 0.8505          
##                  95% CI : (0.8433, 0.8575)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5448          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9489          
##             Specificity : 0.5404          
##          Pos Pred Value : 0.8668          
##          Neg Pred Value : 0.7703          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7204          
##    Detection Prevalence : 0.8311          
##       Balanced Accuracy : 0.7446          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n3_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7037  1081
##      >50K     379  1271
##                                           
##                Accuracy : 0.8505          
##                  95% CI : (0.8433, 0.8575)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5448          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9489          
##             Specificity : 0.5404          
##          Pos Pred Value : 0.8668          
##          Neg Pred Value : 0.7703          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7204          
##    Detection Prevalence : 0.8311          
##       Balanced Accuracy : 0.7446          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n3_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.505324e-01   5.448034e-01   8.433060e-01   8.575490e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  8.720880e-110   3.552727e-75
ad_tda_kde_5.50.5_n3_svm_cf0_ov_acc<-ad_tda_kde_5.50.5_n3_svm_cf0$overall[1]
ad_tda_kde_5.50.5_n3_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9488943            0.5403912            0.8668391 
##       Neg Pred Value            Precision               Recall 
##            0.7703030            0.8668391            0.9488943 
##                   F1           Prevalence       Detection Rate 
##            0.9060126            0.7592138            0.7204136 
## Detection Prevalence    Balanced Accuracy 
##            0.8310811            0.7446427
ad_tda_kde_5.50.5_n3_svm_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n3_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_svm_n3_3_fold<-(ad_rf_fit_re - ad_tda_kde_5.50.5_n3_svm_fit_re)
diff_tda_kde_5.50.5_svm_n3_3_fold
##     Accuracy
## 1 0.05735273
## 2 0.05789636
## 3 0.05217729
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_svm.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n3_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_svm.n3_3_fold_odds.left<-bst_tda_kde_5.50.5_svm.n3_3_fold$probLeft/bst_tda_kde_5.50.5_svm.n3_3_fold$probRight
bst_tda_kde_5.50.5_svm.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_svm.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n3_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.009
## 
## $winRight
## [1] 0.991
# Bayesian Correlated Test

bct_tda_kde_5.50.5_svm.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n3_3_fold
## $left
## [1] 0.0005105301
## 
## $rope
## [1] 0.0005413951
## 
## $right
## [1] 0.9989481
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_svm_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_svm.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm_n3_3_fold))
#bf_tda_kde_5.50.5_svm.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_svm_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_svm_n3_3_fold)
## t = 30.622, df = 2, p-value = 0.001065
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.04796711 0.06365048
## sample estimates:
##  mean of x 
## 0.05580879
### Test set diff
diff_tda_kde_5.50.5_svm.n3_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n3_svm_cf0_ov_acc)
diff_tda_kde_5.50.5_svm.n3_test
##    Accuracy 
## 0.008087633
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_svm.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm.n3_test),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n3_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_svm.n3_test_odds.left<-bst_tda_kde_5.50.5_svm.n3_test$probLeft/bst_tda_kde_5.50.5_svm.n3_test$probRight
bst_tda_kde_5.50.5_svm.n3_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_svm.n3_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm.n3_test),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n3_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_svm.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_svm.n3_test))

#BayesFactor
#bf_tda_kde_5.50.5_svm.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm.n3_test)) #bf_tda_kde_5.50.5_svm.n3_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_svm.n3_test))

##Node4

Adult_TDA_KDE_5.50.5_n4_SvmFit0 <- train(as.factor(adult_df1) ~ ., data =  tda.m_kde_adult_5.50.5.n4.vec, 
                    Importance = T,
                    method = 'svmRadial', 
                  trControl = fitControl,
                          tuneGrid = svmGrid, preProc = c('center','scale'),
                          metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V7.Armed.Forces, V14.Holand.Netherlands, V14.Hong

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V7.Armed.Forces, V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V7.Armed.Forces, V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V7.Armed.Forces, V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V7.Armed.Forces, V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V7.Armed.Forces, V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V7.Armed.Forces, V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V7.Armed.Forces, V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V7.Armed.Forces, V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V7.Armed.Forces, V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V7.Armed.Forces, V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V7.Armed.Forces, V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V7.Armed.Forces, V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V7.Armed.Forces, V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V7.Armed.Forces, V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.50.5_n4_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 10038 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 6691, 6692, 6693 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa     
##    0.1   0.25  0.8237695  0.31328546
##    0.1   0.50  0.8328351  0.39162873
##    0.1   0.75  0.8355246  0.41869496
##    0.1   1.00  0.8376168  0.43522971
##    0.1   1.25  0.8357241  0.43694640
##    1.0   0.25  0.8032474  0.12347358
##    1.0   0.50  0.8109181  0.20267938
##    1.0   0.75  0.8136083  0.24582751
##    1.0   1.00  0.8165965  0.28184130
##    1.0   1.25  0.8160989  0.29695022
##   10.0   0.25  0.7908946  0.00000000
##   10.0   0.50  0.7915919  0.01127624
##   10.0   0.75  0.7932856  0.03911334
##   10.0   1.00  0.7939830  0.07201532
##   10.0   1.25  0.7974699  0.10670093
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.
Adult_TDA_KDE_5.50.5_n4_SvmFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8389603 0.4405910    Fold1
## 2 0.8374178 0.4315942    Fold2
## 3 0.8364723 0.4335039    Fold3
ad_tda_kde_5.50.5_n4_svm_fit_re<-Adult_TDA_KDE_5.50.5_n4_SvmFit0 $resample[1]

summary(Adult_TDA_KDE_5.50.5_n4_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(Adult_TDA_KDE_5.50.5_n4_SvmFit0,25) + ggtitle("Adult_TDA_KDE_5.50.5_n_SvmFit TDA-Assited Svm")

# Predict outcome using Adult_TDA_KDE_5.50.5_n4_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n4_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n4_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n4_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7140  1509
##      >50K     276   843
##                                           
##                Accuracy : 0.8173          
##                  95% CI : (0.8095, 0.8249)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.3912          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9628          
##             Specificity : 0.3584          
##          Pos Pred Value : 0.8255          
##          Neg Pred Value : 0.7534          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7310          
##    Detection Prevalence : 0.8854          
##       Balanced Accuracy : 0.6606          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n4_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7140  1509
##      >50K     276   843
##                                           
##                Accuracy : 0.8173          
##                  95% CI : (0.8095, 0.8249)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.3912          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9628          
##             Specificity : 0.3584          
##          Pos Pred Value : 0.8255          
##          Neg Pred Value : 0.7534          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7310          
##    Detection Prevalence : 0.8854          
##       Balanced Accuracy : 0.6606          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n4_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.172604e-01   3.912255e-01   8.094513e-01   8.248799e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   9.148711e-44  6.189516e-187
ad_tda_kde_5.50.5_n4_svm_cf0_ov_acc<-ad_tda_kde_5.50.5_n4_svm_cf0$overall[1]
ad_tda_kde_5.50.5_n4_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9627832            0.3584184            0.8255290 
##       Neg Pred Value            Precision               Recall 
##            0.7533512            0.8255290            0.9627832 
##                   F1           Prevalence       Detection Rate 
##            0.8888889            0.7592138            0.7309582 
## Detection Prevalence    Balanced Accuracy 
##            0.8854423            0.6606008
ad_tda_kde_5.50.5_n4_svm_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n4_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_svm_n4_3_fold<-(ad_rf_fit_re - ad_tda_kde_5.50.5_n4_svm_fit_re)
diff_tda_kde_5.50.5_svm_n4_3_fold
##     Accuracy
## 1 0.01782531
## 2 0.02109759
## 3 0.02230628
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_svm.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n4_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_svm.n4_3_fold_odds.left<-bst_tda_kde_5.50.5_svm.n4_3_fold$probLeft/bst_tda_kde_5.50.5_svm.n4_3_fold$probRight
bst_tda_kde_5.50.5_svm.n4_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_svm.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n4_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.0383
## 
## $winRight
## [1] 0.9617
# Bayesian Correlated Test

bct_tda_kde_5.50.5_svm.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n4_3_fold
## $left
## [1] 0.001286568
## 
## $rope
## [1] 0.009383813
## 
## $right
## [1] 0.9893296
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_svm_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_svm.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm_n4_3_fold))
#bf_tda_kde_5.50.5_svm.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_svm_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_svm_n4_3_fold)
## t = 15.248, df = 2, p-value = 0.004273
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.01465068 0.02616877
## sample estimates:
##  mean of x 
## 0.02040973
### Test set diff
diff_tda_kde_5.50.5_svm.n4_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n4_svm_cf0_ov_acc)
diff_tda_kde_5.50.5_svm.n4_test
##   Accuracy 
## 0.04135954
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_svm.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm.n4_test),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_svm.n4_test_odds.left<-bst_tda_kde_5.50.5_svm.n4_test$probLeft/bst_tda_kde_5.50.5_svm.n4_test$probRight
bst_tda_kde_5.50.5_svm.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_svm.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm.n4_test),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1565
## 
## $winRight
## [1] 0.8435
# Bayesian Correlated Test

bct_tda_kde_5.50.5_svm.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_svm.n4_test))

#BayesFactor
#bf_tda_kde_5.50.5_svm.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm.n4_test)) #bf_tda_kde_5.50.5_svm.n4_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_svm.n4_test))

##Node5

Adult_TDA_KDE_5.50.5_n5_SvmFit0 <- train(as.factor(adult_df1) ~ ., data =  tda.m_kde_adult_5.50.5.n3.vec, 
                    Importance = T,
                    method = 'svmRadial', 
                  trControl = fitControl,
                          tuneGrid = svmGrid, preProc = c('center','scale'),
                          metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.50.5_n5_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 11634 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## Pre-processing: centered (108), scaled (108) 
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 7756, 7756, 7756 
## Resampling results across tuning parameters:
## 
##   sigma  C     Accuracy   Kappa      
##    0.1   0.25  0.7869177  0.372106305
##    0.1   0.50  0.7983497  0.430093175
##    0.1   0.75  0.8017019  0.449378082
##    0.1   1.00  0.8019598  0.457247054
##    0.1   1.25  0.8019598  0.461313260
##    1.0   0.25  0.7520199  0.171972569
##    1.0   0.50  0.7623345  0.242273554
##    1.0   0.75  0.7682654  0.284635308
##    1.0   1.00  0.7719615  0.313133216
##    1.0   1.25  0.7725632  0.327961061
##   10.0   0.25  0.7243424  0.002359636
##   10.0   0.50  0.7263194  0.021100751
##   10.0   0.75  0.7298436  0.050548989
##   10.0   1.00  0.7344851  0.087034761
##   10.0   1.25  0.7362902  0.107966831
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.25.
Adult_TDA_KDE_5.50.5_n5_SvmFit0$resample
##    Accuracy     Kappa Resample
## 1 0.7960289 0.4447181    Fold1
## 2 0.8073749 0.4811562    Fold3
## 3 0.8024755 0.4580655    Fold2
ad_tda_kde_5.50.5_n5_svm_fit_re<-Adult_TDA_KDE_5.50.5_n5_SvmFit0 $resample[1]

summary(Adult_TDA_KDE_5.50.5_n5_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
#vip(Adult_TDA_KDE_5.50.5_n5_SvmFit0,25) + ggtitle("Adult_TDA_KDE_5.50.5_n5_SvmFit TDA-Assited Svm")

# Predict outcome using Adult_TDA_KDE_5.50.5_n5_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n5_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n5_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n5_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7037  1081
##      >50K     379  1271
##                                           
##                Accuracy : 0.8505          
##                  95% CI : (0.8433, 0.8575)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5448          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9489          
##             Specificity : 0.5404          
##          Pos Pred Value : 0.8668          
##          Neg Pred Value : 0.7703          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7204          
##    Detection Prevalence : 0.8311          
##       Balanced Accuracy : 0.7446          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n5_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7037  1081
##      >50K     379  1271
##                                           
##                Accuracy : 0.8505          
##                  95% CI : (0.8433, 0.8575)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5448          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9489          
##             Specificity : 0.5404          
##          Pos Pred Value : 0.8668          
##          Neg Pred Value : 0.7703          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7204          
##    Detection Prevalence : 0.8311          
##       Balanced Accuracy : 0.7446          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n5_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.505324e-01   5.448034e-01   8.433060e-01   8.575490e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  8.720880e-110   3.552727e-75
ad_tda_kde_5.50.5_n5_svm_cf0_ov_acc<-ad_tda_kde_5.50.5_n5_svm_cf0$overall[1]
ad_tda_kde_5.50.5_n5_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9488943            0.5403912            0.8668391 
##       Neg Pred Value            Precision               Recall 
##            0.7703030            0.8668391            0.9488943 
##                   F1           Prevalence       Detection Rate 
##            0.9060126            0.7592138            0.7204136 
## Detection Prevalence    Balanced Accuracy 
##            0.8310811            0.7446427
ad_tda_kde_5.50.5_n5_svm_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n5_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_svm_n5_3_fold<-(ad_rf_fit_re - ad_tda_kde_5.50.5_n5_svm_fit_re)
diff_tda_kde_5.50.5_svm_n5_3_fold
##     Accuracy
## 1 0.06075669
## 2 0.05114046
## 3 0.05630312
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_svm.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n5_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_svm.n5_3_fold_odds.left<-bst_tda_kde_5.50.5_svm.n5_3_fold$probLeft/bst_tda_kde_5.50.5_svm.n5_3_fold$probRight
bst_tda_kde_5.50.5_svm.n5_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_svm.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n5_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.0088
## 
## $winRight
## [1] 0.9912
# Bayesian Correlated Test

bct_tda_kde_5.50.5_svm.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n5_3_fold
## $left
## [1] 0.001174964
## 
## $rope
## [1] 0.001232736
## 
## $right
## [1] 0.9975923
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_svm_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_svm.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm_n5_3_fold))
#bf_tda_kde_5.50.5_svm.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_svm_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_svm_n5_3_fold)
## t = 20.179, df = 2, p-value = 0.002447
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.04411192 0.06802160
## sample estimates:
##  mean of x 
## 0.05606676
### Test set diff
diff_tda_kde_5.50.5_svm.n5_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n5_svm_cf0_ov_acc)
diff_tda_kde_5.50.5_svm.n5_test
##    Accuracy 
## 0.008087633
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_svm.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm.n5_test),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_svm.n5_test_odds.left<-bst_tda_kde_5.50.5_svm.n5_test$probLeft/bst_tda_kde_5.50.5_svm.n5_test$probRight
bst_tda_kde_5.50.5_svm.n5_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_svm.n5_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm.n5_test),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_svm.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_svm.n5_test))

#BayesFactor
#bf_tda_kde_5.50.5_svm.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm.n5_test)) #bf_tda_kde_5.50.5_svm.n5_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_svm.n4_test))


#Non-TDA-Assisted

nn1Grid<-expand.grid(size = c(2,3,5,7), decay = c(0.3,0.5,0.7))
#Neural Network 
adultNn1Fit <- train(as.factor(adult_df1) ~ ., data = adult.one_hot_df4Train, 
                            Importance = T,
                      method = 'nnet', 
                      trControl = fitControl,
                      tuneGrid = nn1Grid,
                      metric='Accuracy')
## # weights:  221
## initial  value 14548.581548 
## final  value 8389.316919 
## converged
## # weights:  331
## initial  value 13261.985084 
## final  value 8389.251955 
## converged
## # weights:  551
## initial  value 9505.289847 
## iter  10 value 8127.909645
## iter  20 value 7705.626465
## iter  30 value 7580.565673
## iter  40 value 7390.912265
## iter  50 value 7292.717057
## iter  60 value 7241.047880
## iter  70 value 7032.607063
## iter  80 value 6474.816749
## iter  90 value 5929.154853
## iter 100 value 5774.324333
## final  value 5774.324333 
## stopped after 100 iterations
## # weights:  771
## initial  value 8918.219637 
## iter  10 value 8064.209312
## iter  20 value 7730.372203
## iter  30 value 7651.155247
## iter  40 value 7588.913980
## iter  50 value 7504.112055
## iter  60 value 7357.560443
## iter  70 value 7194.703772
## iter  80 value 6348.261745
## iter  90 value 6135.759374
## iter 100 value 5494.808818
## final  value 5494.808818 
## stopped after 100 iterations
## # weights:  221
## initial  value 21809.731847 
## iter  10 value 8410.836456
## iter  20 value 8410.138588
## iter  30 value 8297.927215
## iter  40 value 8292.596798
## iter  50 value 8292.583353
## iter  60 value 8184.729647
## iter  70 value 8134.729629
## iter  80 value 7742.215239
## iter  90 value 7714.045999
## iter 100 value 7707.307397
## final  value 7707.307397 
## stopped after 100 iterations
## # weights:  331
## initial  value 8465.704937 
## iter  10 value 8145.013208
## iter  20 value 7747.280823
## iter  30 value 7721.975702
## iter  40 value 7702.825642
## iter  50 value 7573.575868
## iter  60 value 7122.002939
## iter  70 value 6376.175773
## iter  80 value 6271.468461
## iter  90 value 6201.319576
## iter 100 value 5619.269573
## final  value 5619.269573 
## stopped after 100 iterations
## # weights:  551
## initial  value 8565.061426 
## iter  10 value 8252.002177
## iter  20 value 8246.076400
## iter  30 value 8244.190142
## iter  40 value 7687.243118
## iter  50 value 7611.299048
## iter  60 value 7469.278849
## iter  70 value 7369.824884
## iter  80 value 7351.879627
## iter  90 value 7345.879460
## iter 100 value 7342.244445
## final  value 7342.244445 
## stopped after 100 iterations
## # weights:  771
## initial  value 12988.749828 
## iter  10 value 8377.773032
## iter  20 value 7785.821091
## iter  30 value 7745.149913
## iter  40 value 7680.792232
## iter  50 value 7652.089827
## iter  60 value 7601.691086
## iter  70 value 7572.728827
## iter  80 value 7326.203781
## iter  90 value 6991.445116
## iter 100 value 6352.394906
## final  value 6352.394906 
## stopped after 100 iterations
## # weights:  221
## initial  value 15591.118152 
## iter  10 value 8314.210316
## iter  20 value 7753.179620
## iter  30 value 7728.667963
## iter  40 value 7583.602375
## iter  50 value 7483.323821
## iter  60 value 7414.527613
## iter  70 value 7345.504290
## iter  80 value 7305.037147
## iter  90 value 6902.310845
## iter 100 value 6096.738387
## final  value 6096.738387 
## stopped after 100 iterations
## # weights:  331
## initial  value 13697.848791 
## iter  10 value 8266.585251
## iter  20 value 7660.552751
## iter  30 value 7574.006118
## iter  40 value 7558.482703
## iter  50 value 7515.035583
## iter  60 value 7456.177312
## iter  70 value 7376.751322
## iter  80 value 6924.531216
## iter  90 value 6334.739091
## iter 100 value 5869.895931
## final  value 5869.895931 
## stopped after 100 iterations
## # weights:  551
## initial  value 12280.312637 
## iter  10 value 8270.262344
## iter  20 value 7808.520059
## iter  30 value 7581.215030
## iter  40 value 7541.832555
## iter  50 value 7512.732347
## iter  60 value 7446.493040
## iter  70 value 7410.335408
## iter  80 value 7346.736220
## iter  90 value 7309.492872
## iter 100 value 7206.326378
## final  value 7206.326378 
## stopped after 100 iterations
## # weights:  771
## initial  value 9489.523994 
## iter  10 value 7807.145692
## iter  20 value 7740.100381
## iter  30 value 7711.446920
## iter  40 value 7664.282349
## iter  50 value 7528.705413
## iter  60 value 7315.779895
## iter  70 value 7264.946723
## iter  80 value 7173.838741
## iter  90 value 6565.621735
## iter 100 value 5600.847686
## final  value 5600.847686 
## stopped after 100 iterations
## # weights:  221
## initial  value 8499.650765 
## iter  10 value 8238.602024
## iter  20 value 7771.578514
## iter  30 value 7632.471860
## iter  40 value 7605.350195
## iter  50 value 7597.598519
## iter  60 value 7563.136291
## iter  70 value 7361.903102
## iter  80 value 6790.728894
## iter  90 value 5909.667577
## iter 100 value 5474.174827
## final  value 5474.174827 
## stopped after 100 iterations
## # weights:  331
## initial  value 12942.141421 
## iter  10 value 8267.759111
## iter  20 value 8262.101253
## iter  30 value 8233.712392
## iter  40 value 8065.102197
## iter  50 value 7698.093867
## iter  60 value 7233.088056
## iter  70 value 6948.272049
## iter  80 value 6910.761451
## iter  90 value 6898.592899
## iter 100 value 6888.757918
## final  value 6888.757918 
## stopped after 100 iterations
## # weights:  551
## initial  value 11219.387799 
## iter  10 value 8240.885600
## iter  20 value 7871.635899
## iter  30 value 7774.781347
## iter  40 value 7774.390739
## iter  50 value 7681.143343
## iter  60 value 7624.252860
## iter  70 value 7615.506646
## iter  80 value 7604.485205
## iter  90 value 7594.308480
## iter 100 value 7581.984729
## final  value 7581.984729 
## stopped after 100 iterations
## # weights:  771
## initial  value 10739.870293 
## iter  10 value 7856.447196
## iter  20 value 7789.279797
## iter  30 value 7788.973952
## iter  40 value 7758.448143
## iter  50 value 7620.191729
## iter  60 value 7522.801656
## iter  70 value 7505.296018
## iter  80 value 7446.117130
## iter  90 value 7330.333991
## iter 100 value 7307.907366
## final  value 7307.907366 
## stopped after 100 iterations
## # weights:  221
## initial  value 15489.573310 
## final  value 8388.355832 
## converged
## # weights:  331
## initial  value 8635.513297 
## iter  10 value 8385.182909
## iter  20 value 8383.515651
## iter  30 value 7972.208962
## iter  40 value 7875.696107
## iter  50 value 7804.841942
## iter  60 value 7694.932311
## iter  70 value 7624.687267
## iter  80 value 7563.788469
## iter  90 value 7512.367949
## iter 100 value 7456.369843
## final  value 7456.369843 
## stopped after 100 iterations
## # weights:  551
## initial  value 8686.432030 
## iter  10 value 7963.183767
## iter  20 value 7783.155726
## iter  30 value 7765.480133
## iter  40 value 7671.234084
## iter  50 value 7604.563055
## iter  60 value 7550.753673
## iter  70 value 7473.729951
## iter  80 value 7418.171544
## iter  90 value 7079.497314
## iter 100 value 6930.882204
## final  value 6930.882204 
## stopped after 100 iterations
## # weights:  771
## initial  value 12694.722565 
## iter  10 value 8075.354307
## iter  20 value 7789.820625
## iter  30 value 7783.656527
## iter  40 value 7673.272790
## iter  50 value 7603.499703
## iter  60 value 7595.150039
## iter  70 value 7587.037509
## iter  80 value 7572.539040
## iter  90 value 7564.372453
## iter 100 value 7465.044407
## final  value 7465.044407 
## stopped after 100 iterations
## # weights:  221
## initial  value 13670.393422 
## iter  10 value 8331.515641
## iter  20 value 7744.884137
## iter  30 value 7643.809835
## iter  40 value 7632.506579
## iter  50 value 7454.680418
## iter  60 value 7364.113780
## iter  70 value 7132.030724
## iter  80 value 7021.731748
## iter  90 value 6837.802552
## iter 100 value 6436.400178
## final  value 6436.400178 
## stopped after 100 iterations
## # weights:  331
## initial  value 12382.193508 
## iter  10 value 8272.299664
## iter  20 value 7827.163613
## iter  30 value 7826.048907
## iter  40 value 7825.993585
## iter  50 value 7805.171497
## iter  60 value 7658.660347
## iter  70 value 7637.175610
## iter  80 value 7631.361666
## iter  90 value 7575.723483
## iter 100 value 7478.905527
## final  value 7478.905527 
## stopped after 100 iterations
## # weights:  551
## initial  value 16791.999397 
## iter  10 value 8414.418160
## iter  20 value 8248.702076
## iter  30 value 7787.173721
## iter  40 value 7654.289390
## iter  50 value 7628.116157
## iter  60 value 7527.088828
## iter  70 value 7232.888928
## iter  80 value 7099.634066
## iter  90 value 6689.108443
## iter 100 value 5859.003719
## final  value 5859.003719 
## stopped after 100 iterations
## # weights:  771
## initial  value 13994.478018 
## iter  10 value 8328.087863
## iter  20 value 7822.270077
## iter  30 value 7750.163027
## iter  40 value 7687.449672
## iter  50 value 7627.373426
## iter  60 value 7286.942350
## iter  70 value 7031.148738
## iter  80 value 6628.553989
## iter  90 value 5802.161723
## iter 100 value 5542.857066
## final  value 5542.857066 
## stopped after 100 iterations
## # weights:  221
## initial  value 14672.488444 
## iter  10 value 8164.125165
## iter  20 value 7688.946277
## iter  30 value 7616.520442
## iter  40 value 7342.798570
## iter  50 value 6450.833966
## iter  60 value 6245.025062
## iter  70 value 5787.285906
## iter  80 value 5517.461403
## iter  90 value 5208.023914
## iter 100 value 5026.306172
## final  value 5026.306172 
## stopped after 100 iterations
## # weights:  331
## initial  value 12024.613929 
## iter  10 value 8338.173788
## iter  20 value 8199.674095
## iter  30 value 8106.100266
## iter  40 value 7160.752356
## iter  50 value 6392.534716
## iter  60 value 6007.107484
## iter  70 value 5366.728460
## iter  80 value 5217.464825
## iter  90 value 5025.400916
## iter 100 value 4919.841071
## final  value 4919.841071 
## stopped after 100 iterations
## # weights:  551
## initial  value 11085.459292 
## iter  10 value 8367.183302
## iter  20 value 7774.887022
## iter  30 value 7739.263226
## iter  40 value 7697.478804
## iter  50 value 7638.123535
## iter  60 value 7591.789950
## iter  70 value 7422.788270
## iter  80 value 7367.885736
## iter  90 value 7305.789110
## iter 100 value 6651.088618
## final  value 6651.088618 
## stopped after 100 iterations
## # weights:  771
## initial  value 19249.130209 
## iter  10 value 7980.045537
## iter  20 value 7670.528687
## iter  30 value 7638.017013
## iter  40 value 7624.844120
## iter  50 value 7582.687354
## iter  60 value 7573.851782
## iter  70 value 7544.734345
## iter  80 value 7386.111523
## iter  90 value 7291.907496
## iter 100 value 7121.478671
## final  value 7121.478671 
## stopped after 100 iterations
## # weights:  221
## initial  value 8639.460141 
## iter  10 value 8301.241185
## iter  10 value 8301.241173
## iter  20 value 7962.585510
## iter  30 value 7776.974757
## iter  40 value 7770.244305
## iter  50 value 7742.934863
## iter  60 value 7733.338705
## iter  70 value 7602.904125
## iter  80 value 7516.883837
## iter  90 value 7446.720399
## iter 100 value 7411.933641
## final  value 7411.933641 
## stopped after 100 iterations
## # weights:  331
## initial  value 9536.561339 
## iter  10 value 7970.869683
## iter  20 value 7772.755878
## iter  30 value 7694.966374
## iter  40 value 7594.404951
## iter  50 value 7577.875038
## iter  60 value 7480.075042
## iter  70 value 7443.390754
## iter  80 value 7394.417988
## iter  90 value 6782.059724
## iter 100 value 6374.681601
## final  value 6374.681601 
## stopped after 100 iterations
## # weights:  551
## initial  value 15873.186513 
## iter  10 value 8387.363549
## iter  20 value 8387.104816
## iter  30 value 8319.206589
## iter  40 value 7810.321295
## iter  50 value 7626.368750
## iter  60 value 7565.435314
## iter  70 value 7558.174288
## iter  80 value 7553.399614
## iter  90 value 7546.758136
## iter 100 value 7543.088293
## final  value 7543.088293 
## stopped after 100 iterations
## # weights:  771
## initial  value 8917.377732 
## iter  10 value 7787.962320
## iter  20 value 7760.112612
## iter  30 value 7750.537740
## iter  40 value 7712.663459
## iter  50 value 7696.249040
## iter  60 value 7687.770799
## iter  70 value 7613.744379
## iter  80 value 7554.089556
## iter  90 value 7543.477060
## iter 100 value 7517.258437
## final  value 7517.258437 
## stopped after 100 iterations
## # weights:  221
## initial  value 9647.221252 
## iter  10 value 8386.589990
## iter  20 value 8058.198077
## iter  30 value 7751.011465
## iter  40 value 7749.377947
## final  value 7749.375313 
## converged
## # weights:  331
## initial  value 18626.322738 
## iter  10 value 8321.418177
## iter  20 value 7799.316733
## iter  30 value 7798.363312
## iter  40 value 7791.229522
## iter  50 value 7775.263505
## iter  60 value 7745.860450
## iter  70 value 7644.170840
## iter  80 value 7210.975003
## iter  90 value 6862.677711
## iter 100 value 6763.356817
## final  value 6763.356817 
## stopped after 100 iterations
## # weights:  551
## initial  value 8514.288357 
## iter  10 value 7869.534517
## iter  20 value 7683.902058
## iter  30 value 7615.059347
## iter  40 value 6952.890684
## iter  50 value 6816.948749
## iter  60 value 6225.955986
## iter  70 value 5864.588035
## iter  80 value 5681.086302
## iter  90 value 5326.577845
## iter 100 value 5114.818776
## final  value 5114.818776 
## stopped after 100 iterations
## # weights:  771
## initial  value 14830.129964 
## iter  10 value 8265.106488
## iter  20 value 7778.982903
## iter  30 value 7634.887154
## iter  40 value 7518.686713
## iter  50 value 7480.384417
## iter  60 value 7462.568754
## iter  70 value 7451.279956
## iter  80 value 7422.492853
## iter  90 value 7407.370561
## iter 100 value 7343.403036
## final  value 7343.403036 
## stopped after 100 iterations
## # weights:  771
## initial  value 27434.686025 
## iter  10 value 12570.229169
## iter  20 value 12126.117754
## iter  30 value 11550.474052
## iter  40 value 11459.097934
## iter  50 value 11449.742511
## iter  60 value 11435.954634
## iter  70 value 11393.210999
## iter  80 value 11239.558484
## iter  90 value 11127.231605
## iter 100 value 10976.538999
## final  value 10976.538999 
## stopped after 100 iterations
adultNn1Fit
## Neural Network 
## 
## 22793 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 15196, 15195, 15195 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa    
##   2     0.3    0.8117381  0.3778008
##   2     0.5    0.7830478  0.1539123
##   2     0.7    0.7978325  0.2555682
##   3     0.3    0.8019549  0.3189614
##   3     0.5    0.8155145  0.4303865
##   3     0.7    0.8037126  0.3301381
##   5     0.3    0.8107314  0.4435443
##   5     0.5    0.7986223  0.2965459
##   5     0.7    0.8165657  0.4136638
##   7     0.3    0.8153830  0.3923103
##   7     0.5    0.7965602  0.2435871
##   7     0.7    0.8272724  0.4685915
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 7 and decay = 0.7.
adultNn1Fit$resample
##    Accuracy     Kappa Resample
## 1 0.8286391 0.5271645    Fold2
## 2 0.8462551 0.5525540    Fold1
## 3 0.8069229 0.3260560    Fold3
ad_nn1_fit_re<-adultNn1Fit$resample[1]

summary(adultNn1Fit)
## a 108-7-1 network with 771 weights
## options were - entropy fitting  decay=0.7
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##    -0.97    -0.29    -0.63     0.49    -0.05     0.01    -0.32    -0.29 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##    -0.46     0.31    -0.04     0.00    -0.11    -0.39    -0.17     0.06 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.18    -0.08    -0.07    -0.27     0.07     0.43     0.57     0.05 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.90    -0.04     0.14    -2.24     0.11    -0.94    -0.06     1.21 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.03     0.09     0.28    -1.58    -0.62     0.18     0.01     0.84 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##    -0.05    -0.08     0.11     0.05    -1.43    -0.14     0.63    -0.36 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##    -0.35    -0.08     0.31     1.19     0.00     0.01    -1.82    -0.31 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##    -0.03    -0.10     0.00     0.15     0.13    -1.16    -1.37     0.40 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.01     0.01     0.42     0.03    -0.03    -0.10    -0.07     0.04 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##    -0.13    -0.01     0.00     0.04     0.02     0.01     0.02    -0.03 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.10     0.05     0.00     0.01     0.01    -0.04     0.08    -0.03 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.03     0.10     0.01     0.03     0.04     0.72     0.01     0.02 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##    -0.01    -0.14     0.11     0.03     0.07     0.02     0.06     0.00 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00    -0.01    -1.98    -0.05    -0.03 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h3   i1->h3   i2->h3   i3->h3   i4->h3   i5->h3   i6->h3   i7->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h3   i9->h3  i10->h3  i11->h3  i12->h3  i13->h3  i14->h3  i15->h3 
##     0.00     0.00     0.00    -0.01     0.00     0.00     0.00     0.00 
##  i16->h3  i17->h3  i18->h3  i19->h3  i20->h3  i21->h3  i22->h3  i23->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h3  i25->h3  i26->h3  i27->h3  i28->h3  i29->h3  i30->h3  i31->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h3  i33->h3  i34->h3  i35->h3  i36->h3  i37->h3  i38->h3  i39->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h3  i41->h3  i42->h3  i43->h3  i44->h3  i45->h3  i46->h3  i47->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h3  i49->h3  i50->h3  i51->h3  i52->h3  i53->h3  i54->h3  i55->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h3  i57->h3  i58->h3  i59->h3  i60->h3  i61->h3  i62->h3  i63->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h3  i65->h3  i66->h3  i67->h3  i68->h3  i69->h3  i70->h3  i71->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h3  i73->h3  i74->h3  i75->h3  i76->h3  i77->h3  i78->h3  i79->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h3  i81->h3  i82->h3  i83->h3  i84->h3  i85->h3  i86->h3  i87->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h3  i89->h3  i90->h3  i91->h3  i92->h3  i93->h3  i94->h3  i95->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h3  i97->h3  i98->h3  i99->h3 i100->h3 i101->h3 i102->h3 i103->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h4   i1->h4   i2->h4   i3->h4   i4->h4   i5->h4   i6->h4   i7->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h4   i9->h4  i10->h4  i11->h4  i12->h4  i13->h4  i14->h4  i15->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h4  i17->h4  i18->h4  i19->h4  i20->h4  i21->h4  i22->h4  i23->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h4  i25->h4  i26->h4  i27->h4  i28->h4  i29->h4  i30->h4  i31->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h4  i33->h4  i34->h4  i35->h4  i36->h4  i37->h4  i38->h4  i39->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h4  i41->h4  i42->h4  i43->h4  i44->h4  i45->h4  i46->h4  i47->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h4  i49->h4  i50->h4  i51->h4  i52->h4  i53->h4  i54->h4  i55->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h4  i57->h4  i58->h4  i59->h4  i60->h4  i61->h4  i62->h4  i63->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h4  i65->h4  i66->h4  i67->h4  i68->h4  i69->h4  i70->h4  i71->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h4  i73->h4  i74->h4  i75->h4  i76->h4  i77->h4  i78->h4  i79->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h4  i81->h4  i82->h4  i83->h4  i84->h4  i85->h4  i86->h4  i87->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h4  i89->h4  i90->h4  i91->h4  i92->h4  i93->h4  i94->h4  i95->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h4  i97->h4  i98->h4  i99->h4 i100->h4 i101->h4 i102->h4 i103->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h5   i1->h5   i2->h5   i3->h5   i4->h5   i5->h5   i6->h5   i7->h5 
##     0.00    -0.10     0.00     0.00     0.00     0.00     0.00    -0.01 
##   i8->h5   i9->h5  i10->h5  i11->h5  i12->h5  i13->h5  i14->h5  i15->h5 
##     0.00     0.00     0.00    -0.01     0.00     0.00     0.00     0.00 
##  i16->h5  i17->h5  i18->h5  i19->h5  i20->h5  i21->h5  i22->h5  i23->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h5  i25->h5  i26->h5  i27->h5  i28->h5  i29->h5  i30->h5  i31->h5 
##     0.00     0.00     0.00     0.00    -0.06     0.00     0.00    -0.01 
##  i32->h5  i33->h5  i34->h5  i35->h5  i36->h5  i37->h5  i38->h5  i39->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h5  i41->h5  i42->h5  i43->h5  i44->h5  i45->h5  i46->h5  i47->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h5  i49->h5  i50->h5  i51->h5  i52->h5  i53->h5  i54->h5  i55->h5 
##    -0.01     0.00     0.00    -0.01     0.00     0.00     0.00     0.00 
##  i56->h5  i57->h5  i58->h5  i59->h5  i60->h5  i61->h5  i62->h5  i63->h5 
##     0.00     0.00     0.00     0.00     0.00    -0.01     0.00     0.00 
##  i64->h5  i65->h5  i66->h5  i67->h5  i68->h5  i69->h5  i70->h5  i71->h5 
##    -0.29     0.22    -0.17     0.00     0.00     0.00     0.00     0.00 
##  i72->h5  i73->h5  i74->h5  i75->h5  i76->h5  i77->h5  i78->h5  i79->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h5  i81->h5  i82->h5  i83->h5  i84->h5  i85->h5  i86->h5  i87->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h5  i89->h5  i90->h5  i91->h5  i92->h5  i93->h5  i94->h5  i95->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h5  i97->h5  i98->h5  i99->h5 i100->h5 i101->h5 i102->h5 i103->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h6   i1->h6   i2->h6   i3->h6   i4->h6   i5->h6   i6->h6   i7->h6 
##     1.09     0.23     0.15     0.48    -0.11     0.00    -0.05    -0.49 
##   i8->h6   i9->h6  i10->h6  i11->h6  i12->h6  i13->h6  i14->h6  i15->h6 
##     0.32     0.68     0.11     0.00     0.46    -0.03    -0.21    -0.07 
##  i16->h6  i17->h6  i18->h6  i19->h6  i20->h6  i21->h6  i22->h6  i23->h6 
##     0.24     0.01    -0.29    -0.05     1.29    -1.21    -0.07     0.92 
##  i24->h6  i25->h6  i26->h6  i27->h6  i28->h6  i29->h6  i30->h6  i31->h6 
##    -0.87     0.00    -0.33     1.30     0.92     1.11     0.00    -1.10 
##  i32->h6  i33->h6  i34->h6  i35->h6  i36->h6  i37->h6  i38->h6  i39->h6 
##     0.00     0.46    -0.08     0.69     0.15     1.69     0.00    -0.60 
##  i40->h6  i41->h6  i42->h6  i43->h6  i44->h6  i45->h6  i46->h6  i47->h6 
##    -0.97     0.31     0.76    -0.53     0.49    -0.07     0.36     0.02 
##  i48->h6  i49->h6  i50->h6  i51->h6  i52->h6  i53->h6  i54->h6  i55->h6 
##    -0.77     0.24     0.01    -2.02     1.51    -0.01    -0.34     1.01 
##  i56->h6  i57->h6  i58->h6  i59->h6  i60->h6  i61->h6  i62->h6  i63->h6 
##     0.92     0.01    -0.34     0.27     0.01     1.13     2.77    -1.68 
##  i64->h6  i65->h6  i66->h6  i67->h6  i68->h6  i69->h6  i70->h6  i71->h6 
##    -0.01     0.00     0.51     0.16     0.00     0.10     0.00     0.00 
##  i72->h6  i73->h6  i74->h6  i75->h6  i76->h6  i77->h6  i78->h6  i79->h6 
##     0.00     0.00    -0.07     0.02     0.00     0.00     0.13    -0.01 
##  i80->h6  i81->h6  i82->h6  i83->h6  i84->h6  i85->h6  i86->h6  i87->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h6  i89->h6  i90->h6  i91->h6  i92->h6  i93->h6  i94->h6  i95->h6 
##     0.00    -0.07     0.00    -0.37     0.00     0.00     0.01     0.00 
##  i96->h6  i97->h6  i98->h6  i99->h6 i100->h6 i101->h6 i102->h6 i103->h6 
##     0.00     0.01     0.00     0.00    -0.23    -0.02     0.00     0.00 
## i104->h6 i105->h6 i106->h6 i107->h6 i108->h6 
##     0.00     0.00     1.43     0.00     0.00 
##    b->h7   i1->h7   i2->h7   i3->h7   i4->h7   i5->h7   i6->h7   i7->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h7   i9->h7  i10->h7  i11->h7  i12->h7  i13->h7  i14->h7  i15->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h7  i17->h7  i18->h7  i19->h7  i20->h7  i21->h7  i22->h7  i23->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h7  i25->h7  i26->h7  i27->h7  i28->h7  i29->h7  i30->h7  i31->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h7  i33->h7  i34->h7  i35->h7  i36->h7  i37->h7  i38->h7  i39->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h7  i41->h7  i42->h7  i43->h7  i44->h7  i45->h7  i46->h7  i47->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h7  i49->h7  i50->h7  i51->h7  i52->h7  i53->h7  i54->h7  i55->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h7  i57->h7  i58->h7  i59->h7  i60->h7  i61->h7  i62->h7  i63->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h7  i65->h7  i66->h7  i67->h7  i68->h7  i69->h7  i70->h7  i71->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h7  i73->h7  i74->h7  i75->h7  i76->h7  i77->h7  i78->h7  i79->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h7  i81->h7  i82->h7  i83->h7  i84->h7  i85->h7  i86->h7  i87->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h7  i89->h7  i90->h7  i91->h7  i92->h7  i93->h7  i94->h7  i95->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h7  i97->h7  i98->h7  i99->h7 i100->h7 i101->h7 i102->h7 i103->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h7 i105->h7 i106->h7 i107->h7 i108->h7 
##     0.00     0.00     0.00     0.00     0.00 
##  b->o h1->o h2->o h3->o h4->o h5->o h6->o h7->o 
##  0.67  0.88  0.67  0.00  0.66  2.52 -4.53  0.66
vip(adultNn1Fit,25) + ggtitle("non-TDA-Assited NN")

# Predict outcome using model from training data based on testing data
predictions <- predict(adultNn1Fit, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
nn1_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
nn1_cf
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7388  1888
##      >50K      28   464
##                                           
##                Accuracy : 0.8038          
##                  95% CI : (0.7958, 0.8117)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.2651          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9962          
##             Specificity : 0.1973          
##          Pos Pred Value : 0.7965          
##          Neg Pred Value : 0.9431          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7563          
##    Detection Prevalence : 0.9496          
##       Balanced Accuracy : 0.5968          
##                                           
##        'Positive' Class :  <=50K          
## 
nn1_cf$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.038493e-01   2.650743e-01   7.958334e-01   8.116836e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   2.707074e-26   0.000000e+00
nn1_cf_ov_acc<-nn1_cf$overall[1]
nn1_cf$byClass 
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9962244            0.1972789            0.7964640 
##       Neg Pred Value            Precision               Recall 
##            0.9430894            0.7964640            0.9962244 
##                   F1           Prevalence       Detection Rate 
##            0.8852145            0.7592138            0.7563473 
## Detection Prevalence    Balanced Accuracy 
##            0.9496314            0.5967516
nn1_cf_pre_rec_f1<-nn1_cf$byClass[5:7]

##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node1

#Neural Network 1
Adult_TDA_PC_5.50.5_n1_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n1.vec, 
                           Importance = T,
                     method = 'nnet', 
                     trControl = fitControl,
                     tuneGrid = nn1Grid,
                     metric='Accuracy')
## # weights:  221
## initial  value 2836.907945 
## iter  10 value 518.577253
## iter  20 value 403.625430
## iter  30 value 403.044180
## iter  40 value 402.860008
## final  value 402.859989 
## converged
## # weights:  331
## initial  value 2539.186774 
## iter  10 value 405.303581
## iter  20 value 403.301552
## iter  30 value 402.542923
## iter  40 value 402.541618
## final  value 402.541507 
## converged
## # weights:  551
## initial  value 4712.689662 
## iter  10 value 403.473836
## iter  20 value 397.173548
## iter  30 value 395.472907
## iter  40 value 395.003181
## iter  50 value 394.950209
## iter  60 value 394.217672
## iter  70 value 393.924533
## iter  80 value 393.648611
## iter  90 value 393.520366
## final  value 393.519927 
## converged
## # weights:  771
## initial  value 2931.777566 
## iter  10 value 402.671812
## iter  20 value 402.066953
## iter  30 value 395.005035
## iter  40 value 394.994288
## iter  50 value 394.969819
## iter  60 value 394.777523
## iter  70 value 394.705355
## iter  80 value 394.701043
## iter  80 value 394.701040
## iter  80 value 394.701036
## final  value 394.701036 
## converged
## # weights:  221
## initial  value 1455.279669 
## iter  10 value 433.989726
## iter  20 value 404.791959
## iter  30 value 404.790217
## final  value 404.790209 
## converged
## # weights:  331
## initial  value 3201.278100 
## iter  10 value 407.979300
## iter  20 value 406.238653
## iter  30 value 403.007701
## iter  40 value 399.481792
## iter  50 value 398.147614
## iter  60 value 397.984646
## iter  70 value 397.981722
## iter  80 value 397.972888
## iter  90 value 397.969887
## iter  90 value 397.969885
## iter  90 value 397.969883
## final  value 397.969883 
## converged
## # weights:  551
## initial  value 1055.738991 
## iter  10 value 424.133975
## iter  20 value 403.285895
## iter  30 value 396.873742
## iter  40 value 395.261660
## iter  50 value 395.236942
## final  value 395.229171 
## converged
## # weights:  771
## initial  value 3248.226213 
## iter  10 value 466.258805
## iter  20 value 405.961583
## iter  30 value 401.652375
## iter  40 value 387.471252
## iter  50 value 370.396571
## iter  60 value 364.811836
## iter  70 value 322.882505
## iter  80 value 270.645948
## iter  90 value 266.268129
## iter 100 value 263.695663
## final  value 263.695663 
## stopped after 100 iterations
## # weights:  221
## initial  value 2966.230897 
## iter  10 value 408.565988
## iter  20 value 403.603799
## iter  30 value 398.868543
## iter  40 value 398.772568
## iter  50 value 398.730984
## iter  60 value 398.550217
## final  value 398.549188 
## converged
## # weights:  331
## initial  value 1613.762595 
## iter  10 value 408.606624
## iter  20 value 400.095618
## iter  30 value 398.647911
## iter  40 value 398.454091
## iter  50 value 397.262723
## iter  60 value 397.169852
## final  value 397.167410 
## converged
## # weights:  551
## initial  value 2174.968963 
## iter  10 value 633.720298
## iter  20 value 404.612355
## iter  30 value 402.623205
## iter  40 value 397.744602
## iter  50 value 397.705135
## iter  60 value 397.675471
## iter  70 value 396.501009
## iter  80 value 396.084661
## iter  90 value 396.082717
## final  value 396.082677 
## converged
## # weights:  771
## initial  value 566.958583 
## iter  10 value 407.754307
## iter  20 value 403.177084
## iter  30 value 403.048192
## iter  40 value 403.039149
## iter  50 value 402.995478
## iter  60 value 402.709998
## iter  70 value 397.747754
## iter  80 value 395.613532
## final  value 395.612123 
## converged
## # weights:  221
## initial  value 3356.511421 
## iter  10 value 404.689115
## iter  20 value 404.647264
## iter  30 value 394.040882
## iter  40 value 393.920409
## iter  50 value 393.227123
## iter  60 value 374.176660
## iter  70 value 339.811266
## iter  80 value 321.040164
## iter  90 value 274.329740
## iter 100 value 255.499087
## final  value 255.499087 
## stopped after 100 iterations
## # weights:  331
## initial  value 1706.736790 
## iter  10 value 406.872935
## iter  20 value 398.535879
## iter  30 value 393.592673
## iter  40 value 390.510024
## iter  50 value 387.405058
## iter  60 value 386.477134
## iter  70 value 386.469270
## iter  80 value 385.874675
## iter  90 value 382.478654
## iter 100 value 377.713869
## final  value 377.713869 
## stopped after 100 iterations
## # weights:  551
## initial  value 1134.810330 
## iter  10 value 420.210900
## iter  20 value 402.548055
## iter  30 value 402.537683
## iter  40 value 400.719326
## iter  50 value 399.990642
## iter  60 value 394.688728
## iter  70 value 390.073144
## iter  80 value 381.195611
## iter  90 value 375.793504
## iter 100 value 363.620097
## final  value 363.620097 
## stopped after 100 iterations
## # weights:  771
## initial  value 3766.253548 
## iter  10 value 402.780399
## iter  20 value 402.131264
## iter  30 value 399.966328
## iter  40 value 395.100500
## iter  50 value 390.733489
## iter  60 value 385.698727
## iter  70 value 385.366525
## iter  80 value 382.204517
## iter  90 value 379.865764
## iter 100 value 378.623991
## final  value 378.623991 
## stopped after 100 iterations
## # weights:  221
## initial  value 2423.032008 
## iter  10 value 412.544656
## iter  20 value 410.688403
## iter  30 value 388.431592
## iter  40 value 311.130345
## iter  50 value 280.771948
## iter  60 value 277.822360
## iter  70 value 272.977846
## iter  80 value 260.203148
## iter  90 value 257.582246
## iter 100 value 255.507047
## final  value 255.507047 
## stopped after 100 iterations
## # weights:  331
## initial  value 4013.634672 
## iter  10 value 407.979398
## iter  20 value 404.917367
## iter  30 value 401.736345
## iter  40 value 396.399543
## iter  50 value 394.002718
## iter  60 value 379.758636
## iter  70 value 283.702370
## iter  80 value 267.984108
## iter  90 value 260.938781
## iter 100 value 260.735785
## final  value 260.735785 
## stopped after 100 iterations
## # weights:  551
## initial  value 3930.893896 
## iter  10 value 591.908271
## iter  20 value 553.824710
## iter  30 value 513.427847
## iter  40 value 340.423225
## iter  50 value 290.630860
## iter  60 value 272.720562
## iter  70 value 260.690189
## iter  80 value 256.572845
## iter  90 value 255.416173
## iter 100 value 255.265274
## final  value 255.265274 
## stopped after 100 iterations
## # weights:  771
## initial  value 1254.322447 
## iter  10 value 451.018846
## iter  20 value 415.528655
## iter  30 value 402.120559
## iter  40 value 394.088544
## iter  50 value 393.818793
## iter  60 value 392.721256
## iter  70 value 390.407120
## iter  80 value 389.895820
## iter  90 value 389.797848
## iter 100 value 389.787031
## final  value 389.787031 
## stopped after 100 iterations
## # weights:  221
## initial  value 1392.425628 
## iter  10 value 411.483201
## iter  20 value 408.742637
## iter  30 value 407.945204
## iter  40 value 396.984244
## iter  50 value 391.058888
## iter  60 value 385.915573
## iter  70 value 379.012578
## iter  80 value 298.322581
## iter  90 value 277.878148
## iter 100 value 276.859115
## final  value 276.859115 
## stopped after 100 iterations
## # weights:  331
## initial  value 1382.859021 
## iter  10 value 407.370061
## iter  20 value 407.176565
## iter  30 value 395.899627
## iter  40 value 395.706723
## iter  50 value 395.700806
## iter  60 value 395.541353
## iter  70 value 395.470607
## iter  80 value 394.649707
## iter  90 value 390.892484
## iter 100 value 373.061883
## final  value 373.061883 
## stopped after 100 iterations
## # weights:  551
## initial  value 2149.580222 
## iter  10 value 417.058408
## iter  20 value 408.582786
## iter  30 value 406.106868
## iter  40 value 402.964728
## iter  50 value 390.833017
## iter  60 value 353.044941
## iter  70 value 295.642414
## iter  80 value 287.149724
## iter  90 value 277.517982
## iter 100 value 276.375759
## final  value 276.375759 
## stopped after 100 iterations
## # weights:  771
## initial  value 2693.848887 
## iter  10 value 410.775132
## iter  20 value 400.429601
## iter  30 value 395.116441
## iter  40 value 393.995829
## iter  50 value 393.371924
## iter  60 value 390.253023
## iter  70 value 384.261623
## iter  80 value 373.482905
## iter  90 value 329.328634
## iter 100 value 284.222265
## final  value 284.222265 
## stopped after 100 iterations
## # weights:  221
## initial  value 2841.927590 
## iter  10 value 408.190379
## iter  20 value 407.310835
## iter  30 value 407.260053
## iter  40 value 406.454889
## iter  50 value 406.447611
## final  value 406.447584 
## converged
## # weights:  331
## initial  value 2308.684339 
## iter  10 value 410.933839
## iter  20 value 399.975846
## iter  30 value 399.540579
## iter  40 value 399.383150
## iter  50 value 399.034432
## iter  60 value 398.996703
## final  value 398.996451 
## converged
## # weights:  551
## initial  value 749.565389 
## iter  10 value 401.975568
## iter  20 value 399.018956
## iter  30 value 398.477992
## iter  40 value 398.120247
## iter  50 value 397.796948
## iter  60 value 397.697198
## final  value 397.692517 
## converged
## # weights:  771
## initial  value 890.105510 
## iter  10 value 403.465952
## iter  20 value 398.161924
## iter  30 value 393.152954
## iter  40 value 383.242419
## iter  50 value 333.945381
## iter  60 value 331.077253
## iter  70 value 308.799322
## iter  80 value 284.545570
## iter  90 value 262.605228
## iter 100 value 257.735506
## final  value 257.735506 
## stopped after 100 iterations
## # weights:  221
## initial  value 3439.596740 
## iter  10 value 410.763435
## iter  20 value 403.121717
## final  value 401.682198 
## converged
## # weights:  331
## initial  value 1600.342938 
## iter  10 value 407.648564
## iter  20 value 398.613043
## iter  30 value 397.058136
## iter  40 value 395.789353
## iter  50 value 389.360849
## iter  60 value 364.217666
## iter  70 value 327.683252
## iter  80 value 316.508613
## iter  90 value 305.235095
## iter 100 value 272.994455
## final  value 272.994455 
## stopped after 100 iterations
## # weights:  551
## initial  value 1201.150165 
## iter  10 value 405.463221
## iter  20 value 400.001155
## iter  30 value 398.574829
## iter  40 value 398.034985
## iter  50 value 397.887576
## iter  60 value 397.813364
## iter  70 value 397.775824
## iter  80 value 396.312466
## iter  90 value 390.872495
## iter 100 value 386.625450
## final  value 386.625450 
## stopped after 100 iterations
## # weights:  771
## initial  value 3402.428895 
## iter  10 value 407.503560
## iter  20 value 405.495424
## iter  30 value 399.295854
## iter  40 value 397.962241
## iter  50 value 397.409165
## iter  60 value 392.257880
## iter  70 value 389.429798
## iter  80 value 386.442411
## iter  90 value 368.839667
## iter 100 value 360.149440
## final  value 360.149440 
## stopped after 100 iterations
## # weights:  221
## initial  value 2951.195531 
## iter  10 value 420.660571
## iter  20 value 419.952911
## iter  30 value 414.570170
## iter  40 value 399.078938
## iter  50 value 388.716985
## iter  60 value 363.208753
## iter  70 value 310.747471
## iter  80 value 290.283893
## iter  90 value 283.416502
## iter 100 value 282.125804
## final  value 282.125804 
## stopped after 100 iterations
## # weights:  331
## initial  value 2621.010713 
## iter  10 value 420.365203
## iter  20 value 410.215185
## iter  30 value 400.435176
## iter  40 value 395.693200
## iter  50 value 392.675972
## iter  60 value 382.449067
## iter  70 value 323.143153
## iter  80 value 307.805747
## iter  90 value 294.083212
## iter 100 value 286.879003
## final  value 286.879003 
## stopped after 100 iterations
## # weights:  551
## initial  value 1289.204100 
## iter  10 value 416.159932
## iter  20 value 413.539686
## iter  30 value 407.629227
## iter  40 value 407.356715
## iter  50 value 406.084117
## iter  60 value 399.956908
## iter  70 value 388.512638
## iter  80 value 334.575048
## iter  90 value 310.447315
## iter 100 value 306.738634
## final  value 306.738634 
## stopped after 100 iterations
## # weights:  771
## initial  value 3498.777832 
## iter  10 value 432.466094
## iter  20 value 424.403960
## iter  30 value 404.384914
## iter  40 value 400.376202
## iter  50 value 399.248312
## iter  60 value 395.211162
## iter  70 value 393.532939
## iter  80 value 387.495041
## iter  90 value 383.189974
## iter 100 value 381.227176
## final  value 381.227176 
## stopped after 100 iterations
## # weights:  221
## initial  value 4197.036092 
## iter  10 value 607.377587
## iter  20 value 607.359193
## iter  30 value 605.018445
## iter  40 value 594.296585
## iter  50 value 564.919586
## iter  60 value 534.098525
## iter  70 value 427.187659
## iter  80 value 396.090599
## iter  90 value 394.696603
## iter 100 value 394.479929
## final  value 394.479929 
## stopped after 100 iterations
Adult_TDA_PC_5.50.5_n1_NN1Fit0
## Neural Network 
## 
## 4917 samples
##  108 predictor
##    2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 3278, 3278, 3278 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa       
##   2     0.3    0.9733577   0.068380213
##   2     0.5    0.9741712   0.054307116
##   2     0.7    0.9733577   0.000000000
##   3     0.3    0.9733577   0.000000000
##   3     0.5    0.9731544   0.027224806
##   3     0.7    0.9733577   0.000000000
##   5     0.3    0.9733577   0.000000000
##   5     0.5    0.9741712   0.054307116
##   5     0.7    0.9733577   0.000000000
##   7     0.3    0.9733577   0.000000000
##   7     0.5    0.9727476  -0.001146296
##   7     0.7    0.9733577   0.000000000
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 2 and decay = 0.5.
Adult_TDA_PC_5.50.5_n1_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.9737645 0.0000000    Fold3
## 2 0.9755949 0.1629213    Fold2
## 3 0.9731544 0.0000000    Fold1
ad_tda_pc_5.50.5_n1_nn1_fit_re<-Adult_TDA_PC_5.50.5_n1_NN1Fit0$resample[1]

summary(Adult_TDA_PC_5.50.5_n1_NN1Fit0)
## a 108-2-1 network with 221 weights
## options were - entropy fitting  decay=0.5
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.00     0.00     0.00    -0.03     0.00     0.00     0.00     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.09     0.00     0.01     0.00     0.00     0.00     0.00     0.00 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##     2.33    -0.06     0.82     0.35     0.42     0.00     1.56    -0.81 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##    -0.69     0.68     0.00     0.00     0.17     0.30     0.24     0.01 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.14     0.73     0.25    -0.08     0.39    -0.19    -1.38     1.30 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##    -0.47     0.00    -0.55     1.47    -0.08     0.35     0.24     1.74 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.00     0.00     0.00     0.00     0.82     0.65     0.01    -0.40 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##    -0.21    -0.28     0.07     0.38     0.18     0.00    -0.25     0.53 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##     0.70     0.48    -0.34     0.75     0.41     0.14     0.00     0.06 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##     0.96     0.31     0.42     0.93     0.27     0.39     0.96     1.37 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.00     0.00    -0.05     0.59     0.00     0.20    -0.61     0.19 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##     0.50     0.00     0.00     0.13     0.43     0.19     0.47     0.13 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.00     0.00     0.00     0.00     0.01     0.02    -0.64    -0.77 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.02     0.24     0.00     0.06     0.01     0.32     0.00     0.00 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.00     0.14     0.04     0.06     0.06     0.17    -0.01     0.62 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.04     0.00    -0.32     0.00     0.01 
##  b->o h1->o h2->o 
##  0.28 -0.17  6.62
vip(Adult_TDA_PC_5.50.5_n1_NN1Fit0,25) + ggtitle("Adult_TDA_PCA_5.50.5_n1_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_PC_5.50.5_n1_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n1_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n1_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n1_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K      0     0
##      >50K    7416  2352
##                                           
##                Accuracy : 0.2408          
##                  95% CI : (0.2323, 0.2494)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.0000          
##             Specificity : 1.0000          
##          Pos Pred Value :    NaN          
##          Neg Pred Value : 0.2408          
##              Prevalence : 0.7592          
##          Detection Rate : 0.0000          
##    Detection Prevalence : 0.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n1_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K      0     0
##      >50K    7416  2352
##                                           
##                Accuracy : 0.2408          
##                  95% CI : (0.2323, 0.2494)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.0000          
##             Specificity : 1.0000          
##          Pos Pred Value :    NaN          
##          Neg Pred Value : 0.2408          
##              Prevalence : 0.7592          
##          Detection Rate : 0.0000          
##    Detection Prevalence : 0.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n1_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.2407862      0.0000000      0.2323343      0.2493929      0.7592138 
## AccuracyPValue  McnemarPValue 
##      1.0000000      0.0000000
ad_tda_pc_5.50.5_n1_nn1_cf0_ov_acc<-ad_tda_pc_5.50.5_n1_nn1_cf0$overall[1]
ad_tda_pc_5.50.5_n1_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.0000000            1.0000000                  NaN 
##       Neg Pred Value            Precision               Recall 
##            0.2407862                   NA            0.0000000 
##                   F1           Prevalence       Detection Rate 
##                   NA            0.7592138            0.0000000 
## Detection Prevalence    Balanced Accuracy 
##            0.0000000            0.5000000
ad_tda_pc_5.50.5_n1_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n1_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_nn1_n1_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.50.5_n1_nn1_fit_re)
diff_tda_pca_5.50.5_nn1_n1_3_fold
##     Accuracy
## 1 -0.1451254
## 2 -0.1293398
## 3 -0.1662315
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nn1.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n1_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nn1.n1_3_fold_odds.left<-bst_tda_pca_5.50.5_nn1.n1_3_fold$probLeft/bst_tda_pca_5.50.5_nn1.n1_3_fold$probRight
bst_tda_pca_5.50.5_nn1.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nn1.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n1_3_fold
## $winLeft
## [1] 0.9912333
## 
## $winRope
## [1] 0.008766667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nn1.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n1_3_fold
## $left
## [1] 0.9959864
## 
## $rope
## [1] 0.0009492257
## 
## $right
## [1] 0.003064356
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nn1_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nn1.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1_n1_3_fold))
#bf_tda_pca_5.50.5_nn1.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nn1_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_nn1_n1_3_fold)
## t = -13.746, df = 2, p-value = 0.005251
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1928795 -0.1009183
## sample estimates:
##  mean of x 
## -0.1468989
### Test set diff
diff_tda_pca_5.50.5_nn1.n1_test<-(nn1_cf_ov_acc - ad_tda_pc_5.50.5_n1_nn1_cf0_ov_acc)
diff_tda_pca_5.50.5_nn1.n1_test
##  Accuracy 
## 0.5630631
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nn1.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1.n1_test),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n1_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nn1.n1_test_odds.left<-bst_tda_pca_5.50.5_nn1.n1_test$probLeft/bst_tda_pca_5.50.5_nn1.n1_test$probRight
bst_tda_pca_5.50.5_nn1.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nn1.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1.n1_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n1_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1593667
## 
## $winRight
## [1] 0.8406333
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nn1.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nn1.n1_test)))

#BayesFactor
#bf_tda_pca_5.50.5_nn1.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1.n1_test)) #bf_tda_pca_5.50.5_nn1.n1_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nn1.n1_test))

##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node2

##Adult_TDA_PC_5.50.5_n2_NN1Fit0 <- nnet(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n2.vec, size=2, range = 0.6,, type='class')

#Neural Network 1
Adult_TDA_PC_5.50.5_n2_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n2.vec, 
                           Importance = T,
                     method = 'nnet', 
                     trControl = fitControl,
                     tuneGrid = nn1Grid,
                     metric='Accuracy')
## # weights:  221
## initial  value 8501.489925 
## iter  10 value 5602.512076
## iter  20 value 5479.563941
## iter  30 value 5242.987078
## iter  40 value 5224.069749
## iter  50 value 5197.250019
## iter  60 value 5182.016418
## iter  70 value 5154.983647
## iter  80 value 5106.521450
## iter  90 value 4995.575971
## iter 100 value 4719.347351
## final  value 4719.347351 
## stopped after 100 iterations
## # weights:  331
## initial  value 5674.318127 
## iter  10 value 5606.335713
## iter  20 value 5541.955405
## iter  30 value 5393.046334
## iter  40 value 5236.529370
## iter  50 value 5181.658157
## iter  60 value 5147.881376
## iter  70 value 4899.259728
## iter  80 value 4559.931261
## iter  90 value 4463.006167
## iter 100 value 4397.988800
## final  value 4397.988800 
## stopped after 100 iterations
## # weights:  551
## initial  value 6148.593218 
## iter  10 value 5367.581876
## iter  20 value 5301.948740
## iter  30 value 5239.090475
## iter  40 value 5219.942302
## iter  50 value 5190.334687
## iter  60 value 5165.238147
## iter  70 value 5145.292867
## iter  80 value 5124.977846
## iter  90 value 5094.394583
## iter 100 value 4939.138367
## final  value 4939.138367 
## stopped after 100 iterations
## # weights:  771
## initial  value 7630.352146 
## iter  10 value 5566.475071
## iter  20 value 5348.243462
## iter  30 value 5338.628861
## iter  40 value 5333.505802
## iter  50 value 5327.435567
## iter  60 value 5318.156729
## iter  70 value 5289.030076
## iter  80 value 5250.152031
## iter  90 value 5188.217635
## iter 100 value 4967.599152
## final  value 4967.599152 
## stopped after 100 iterations
## # weights:  221
## initial  value 5688.532556 
## iter  10 value 5603.851503
## iter  20 value 5587.764128
## iter  30 value 5586.467014
## iter  40 value 5586.229355
## iter  50 value 5476.331379
## iter  60 value 5341.794144
## iter  70 value 5319.639780
## iter  80 value 5317.164490
## iter  90 value 5317.067232
## iter 100 value 5317.052079
## final  value 5317.052079 
## stopped after 100 iterations
## # weights:  331
## initial  value 5924.103932 
## iter  10 value 5564.082832
## iter  20 value 5549.278100
## iter  30 value 5352.254079
## iter  40 value 5316.628910
## iter  50 value 5264.582555
## iter  60 value 5236.127171
## iter  70 value 5213.616614
## iter  80 value 5205.933433
## iter  90 value 5163.496211
## iter 100 value 5122.296066
## final  value 5122.296066 
## stopped after 100 iterations
## # weights:  551
## initial  value 5682.784565 
## iter  10 value 5590.139938
## iter  20 value 5487.890996
## iter  30 value 5461.644350
## iter  40 value 5383.205058
## iter  50 value 5263.092977
## iter  60 value 4784.923006
## iter  70 value 4574.033354
## iter  80 value 4453.232250
## iter  90 value 4445.492375
## iter 100 value 4428.645044
## final  value 4428.645044 
## stopped after 100 iterations
## # weights:  771
## initial  value 5678.307466 
## iter  10 value 5563.283208
## iter  20 value 5320.774287
## iter  30 value 5312.164702
## iter  40 value 5295.506320
## iter  50 value 5289.275912
## iter  60 value 5280.127461
## iter  70 value 5273.647246
## iter  80 value 5263.023222
## iter  90 value 5239.110643
## iter 100 value 5157.602774
## final  value 5157.602774 
## stopped after 100 iterations
## # weights:  221
## initial  value 5635.539123 
## iter  10 value 5591.803146
## iter  20 value 5533.626598
## iter  30 value 5533.604290
## iter  40 value 5282.219255
## iter  50 value 5257.652555
## iter  60 value 5204.959304
## iter  70 value 5193.228398
## iter  80 value 5136.200003
## iter  90 value 4947.496546
## iter 100 value 4657.075371
## final  value 4657.075371 
## stopped after 100 iterations
## # weights:  331
## initial  value 6791.111388 
## iter  10 value 5381.993847
## iter  20 value 5284.613384
## iter  30 value 5203.971963
## iter  40 value 5172.626133
## iter  50 value 5076.857923
## iter  60 value 4779.158131
## iter  70 value 4655.033896
## iter  80 value 4635.293455
## iter  90 value 4467.745351
## iter 100 value 4393.977069
## final  value 4393.977069 
## stopped after 100 iterations
## # weights:  551
## initial  value 6805.592754 
## iter  10 value 5454.440697
## iter  20 value 5340.105906
## iter  30 value 5332.934276
## iter  40 value 5271.119800
## iter  50 value 5265.824221
## iter  60 value 5263.743045
## iter  70 value 5259.949051
## iter  80 value 5253.410759
## final  value 5252.717326 
## converged
## # weights:  771
## initial  value 5923.382873 
## iter  10 value 5359.866895
## iter  20 value 5283.407851
## iter  30 value 5249.501355
## iter  40 value 5219.752243
## iter  50 value 5185.610500
## iter  60 value 5143.071181
## iter  70 value 5118.664645
## iter  80 value 5104.569192
## iter  90 value 5100.524964
## iter 100 value 5089.128876
## final  value 5089.128876 
## stopped after 100 iterations
## # weights:  221
## initial  value 6476.846192 
## iter  10 value 5384.328284
## iter  20 value 5340.451463
## iter  30 value 5196.106782
## iter  40 value 5179.814870
## iter  50 value 5157.067101
## iter  60 value 5141.963942
## iter  70 value 5135.948469
## iter  80 value 5127.193365
## iter  90 value 5076.104933
## iter 100 value 5008.686911
## final  value 5008.686911 
## stopped after 100 iterations
## # weights:  331
## initial  value 5635.852909 
## iter  10 value 5482.846950
## iter  20 value 5451.801758
## iter  30 value 5404.050079
## iter  40 value 5342.856386
## iter  50 value 5218.146743
## iter  60 value 5120.473739
## iter  70 value 4747.373284
## iter  80 value 4609.670456
## iter  90 value 4434.959304
## iter 100 value 4337.784229
## final  value 4337.784229 
## stopped after 100 iterations
## # weights:  551
## initial  value 5692.181753 
## iter  10 value 5390.494890
## iter  20 value 5370.194507
## iter  30 value 5346.748222
## iter  40 value 5333.762368
## iter  50 value 5322.913535
## iter  60 value 5215.907106
## iter  70 value 5185.954248
## iter  80 value 5174.562913
## iter  90 value 5166.894689
## iter 100 value 5152.796790
## final  value 5152.796790 
## stopped after 100 iterations
## # weights:  771
## initial  value 5683.084985 
## iter  10 value 5608.586977
## iter  20 value 5606.497741
## iter  30 value 5354.192673
## iter  40 value 5351.495641
## iter  50 value 5351.460618
## iter  60 value 5350.751976
## iter  70 value 5340.737616
## iter  80 value 5312.617355
## iter  90 value 5155.574615
## iter 100 value 5110.736246
## final  value 5110.736246 
## stopped after 100 iterations
## # weights:  221
## initial  value 5751.862863 
## iter  10 value 5538.622789
## iter  20 value 5356.702283
## iter  30 value 5350.287860
## iter  40 value 5338.920565
## iter  50 value 5333.962313
## iter  60 value 5333.331168
## iter  70 value 5315.918581
## iter  80 value 5227.564705
## iter  90 value 5184.072379
## iter 100 value 5108.277229
## final  value 5108.277229 
## stopped after 100 iterations
## # weights:  331
## initial  value 5777.695921 
## iter  10 value 5392.053482
## iter  20 value 5226.299885
## iter  30 value 5217.471809
## iter  40 value 5209.822838
## iter  50 value 5181.393693
## iter  60 value 5174.852310
## iter  70 value 5161.859776
## iter  80 value 5153.347679
## iter  90 value 5147.818682
## iter 100 value 5136.653708
## final  value 5136.653708 
## stopped after 100 iterations
## # weights:  551
## initial  value 5621.403853 
## iter  10 value 5438.756421
## iter  20 value 5272.625547
## iter  30 value 5262.158460
## iter  40 value 5221.067169
## iter  50 value 5175.806839
## iter  60 value 5136.086176
## iter  70 value 4916.776165
## iter  80 value 4735.000216
## iter  90 value 4645.270568
## iter 100 value 4552.562131
## final  value 4552.562131 
## stopped after 100 iterations
## # weights:  771
## initial  value 6808.139907 
## iter  10 value 5603.958554
## iter  20 value 5349.796568
## iter  30 value 5227.585991
## iter  40 value 5212.622023
## iter  50 value 5167.009083
## iter  60 value 5126.643094
## iter  70 value 5123.562355
## iter  80 value 5115.845076
## iter  90 value 5111.715972
## iter 100 value 5108.420798
## final  value 5108.420798 
## stopped after 100 iterations
## # weights:  221
## initial  value 5727.481271 
## iter  10 value 5486.641967
## iter  20 value 5380.007854
## iter  30 value 5351.967940
## iter  40 value 5344.417360
## iter  50 value 5337.963105
## iter  60 value 5325.910175
## iter  70 value 5263.441330
## iter  80 value 4940.044017
## iter  90 value 4624.173305
## iter 100 value 4501.426782
## final  value 4501.426782 
## stopped after 100 iterations
## # weights:  331
## initial  value 5658.123695 
## iter  10 value 5575.143020
## iter  20 value 5330.767170
## iter  30 value 5289.030212
## iter  40 value 5279.485170
## iter  50 value 5266.697315
## iter  60 value 5257.519032
## iter  70 value 5199.404764
## iter  80 value 5176.869658
## iter  90 value 5168.955273
## iter 100 value 5163.035956
## final  value 5163.035956 
## stopped after 100 iterations
## # weights:  551
## initial  value 6480.357122 
## iter  10 value 5384.060742
## iter  20 value 5290.723163
## iter  30 value 5264.752408
## iter  40 value 5214.533502
## iter  50 value 5171.361576
## iter  60 value 5149.678894
## iter  70 value 5113.912907
## iter  80 value 5101.028024
## iter  90 value 5092.209698
## iter 100 value 5028.116751
## final  value 5028.116751 
## stopped after 100 iterations
## # weights:  771
## initial  value 7215.041980 
## iter  10 value 5612.445716
## iter  20 value 5545.959869
## iter  30 value 5337.472064
## iter  40 value 5318.278993
## iter  50 value 5311.504748
## iter  60 value 5299.014522
## iter  70 value 5223.165705
## iter  80 value 5216.621155
## iter  90 value 5177.537643
## iter 100 value 5107.760574
## final  value 5107.760574 
## stopped after 100 iterations
## # weights:  221
## initial  value 6919.487025 
## iter  10 value 5586.628261
## iter  20 value 5274.810689
## iter  30 value 5069.851829
## iter  40 value 4875.523664
## iter  50 value 4632.917259
## iter  60 value 4583.310314
## iter  70 value 4444.477266
## iter  80 value 4368.095634
## iter  90 value 4312.518980
## iter 100 value 4280.243676
## final  value 4280.243676 
## stopped after 100 iterations
## # weights:  331
## initial  value 6108.023993 
## iter  10 value 5425.942831
## iter  20 value 5339.370659
## iter  30 value 5336.241449
## iter  40 value 5331.598715
## iter  50 value 5323.451570
## iter  60 value 5319.572255
## iter  70 value 5318.958319
## iter  80 value 5306.194456
## iter  90 value 5223.854593
## iter 100 value 5160.880142
## final  value 5160.880142 
## stopped after 100 iterations
## # weights:  551
## initial  value 5652.322395 
## iter  10 value 5493.042972
## iter  20 value 5355.461727
## iter  30 value 5250.919931
## iter  40 value 5198.974734
## iter  50 value 5172.547671
## iter  60 value 5161.393190
## iter  70 value 5122.575687
## iter  80 value 4889.705205
## iter  90 value 4635.675146
## iter 100 value 4489.481150
## final  value 4489.481150 
## stopped after 100 iterations
## # weights:  771
## initial  value 5860.548042 
## iter  10 value 5537.338574
## iter  20 value 5370.058976
## iter  30 value 5297.878408
## iter  40 value 5223.657686
## iter  50 value 5187.272019
## iter  60 value 5164.584213
## iter  70 value 4811.661745
## iter  80 value 4675.543024
## iter  90 value 4575.921934
## iter 100 value 4515.676304
## final  value 4515.676304 
## stopped after 100 iterations
## # weights:  221
## initial  value 5651.731634 
## iter  10 value 5608.235414
## iter  10 value 5608.235410
## iter  10 value 5608.235408
## final  value 5608.235408 
## converged
## # weights:  331
## initial  value 5695.125048 
## iter  10 value 5575.871931
## iter  20 value 5574.114115
## iter  30 value 5570.837039
## iter  40 value 5569.530523
## iter  50 value 5431.361527
## iter  60 value 5345.820210
## iter  70 value 5342.799769
## iter  80 value 5340.003955
## iter  90 value 5328.830207
## iter 100 value 5238.856658
## final  value 5238.856658 
## stopped after 100 iterations
## # weights:  551
## initial  value 6561.083244 
## iter  10 value 5611.774392
## iter  20 value 5346.302182
## iter  30 value 5336.068878
## iter  40 value 5333.848430
## iter  50 value 5318.823588
## iter  60 value 5309.810633
## iter  70 value 5290.808997
## iter  80 value 5278.920705
## iter  90 value 5182.860132
## iter 100 value 4943.133823
## final  value 4943.133823 
## stopped after 100 iterations
## # weights:  771
## initial  value 5682.464927 
## iter  10 value 5496.623693
## iter  20 value 5374.316492
## iter  30 value 5219.683456
## iter  40 value 5211.331569
## iter  50 value 5179.679189
## iter  60 value 5170.725766
## iter  70 value 5160.559853
## iter  80 value 5147.665777
## iter  90 value 5071.454125
## iter 100 value 5047.611715
## final  value 5047.611715 
## stopped after 100 iterations
## # weights:  221
## initial  value 5831.880290 
## iter  10 value 5380.277809
## iter  20 value 5317.874703
## iter  30 value 5198.232111
## iter  40 value 5128.522143
## iter  50 value 4977.813840
## iter  60 value 4774.380041
## iter  70 value 4587.656551
## iter  80 value 4480.854345
## iter  90 value 4390.998143
## iter 100 value 4327.113953
## final  value 4327.113953 
## stopped after 100 iterations
## # weights:  331
## initial  value 5717.759937 
## iter  10 value 5607.732799
## final  value 5607.526605 
## converged
## # weights:  551
## initial  value 5887.820435 
## iter  10 value 5517.994112
## iter  20 value 5289.746428
## iter  30 value 5242.019551
## iter  40 value 5141.592401
## iter  50 value 4752.297362
## iter  60 value 4604.780857
## iter  70 value 4432.408981
## iter  80 value 4396.898190
## iter  90 value 4359.971549
## iter 100 value 4344.451765
## final  value 4344.451765 
## stopped after 100 iterations
## # weights:  771
## initial  value 6085.347488 
## iter  10 value 5593.712643
## iter  20 value 5352.628883
## iter  30 value 5345.612490
## iter  40 value 5342.245125
## iter  50 value 5340.376507
## iter  60 value 5334.729747
## iter  70 value 5333.636490
## iter  80 value 5333.020678
## iter  90 value 5332.805521
## iter 100 value 5267.104587
## final  value 5267.104587 
## stopped after 100 iterations
## # weights:  221
## initial  value 8447.227381 
## iter  10 value 8021.852433
## iter  20 value 8003.153719
## iter  30 value 8000.769078
## iter  40 value 7994.649780
## iter  50 value 7958.350979
## iter  60 value 7898.325809
## iter  70 value 7753.928654
## iter  80 value 7592.614818
## iter  90 value 7032.877260
## iter 100 value 6735.255207
## final  value 6735.255207 
## stopped after 100 iterations
Adult_TDA_PC_5.50.5_n2_NN1Fit0
## Neural Network 
## 
## 12206 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8137, 8137, 8138 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa     
##   2     0.3    0.6839289  0.37716216
##   2     0.5    0.5560371  0.03764186
##   2     0.7    0.7143212  0.43025567
##   3     0.3    0.6735136  0.36276157
##   3     0.5    0.5652119  0.15363819
##   3     0.7    0.6114971  0.20945174
##   5     0.3    0.6137229  0.21234778
##   5     0.5    0.6994074  0.39635213
##   5     0.7    0.6370702  0.28692174
##   7     0.3    0.6772110  0.34572762
##   7     0.5    0.5956091  0.21243700
##   7     0.7    0.5878243  0.20003391
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 2 and decay = 0.7.
Adult_TDA_PC_5.50.5_n2_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.7192724 0.4370092    Fold3
## 2 0.7151634 0.4368938    Fold2
## 3 0.7085279 0.4168640    Fold1
ad_tda_pc_5.50.5_n2_nn1_fit_re<-Adult_TDA_PC_5.50.5_n2_NN1Fit0$resample[1]

summary(Adult_TDA_PC_5.50.5_n2_NN1Fit0)
## a 108-2-1 network with 221 weights
## options were - entropy fitting  decay=0.7
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.60     0.00     0.19    -0.31     1.20     0.00     0.26    -0.09 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.05    -0.74     0.03     0.00    -0.54     0.38     0.87    -0.62 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##    -1.62    -0.10    -0.17     0.73     2.34    -0.14    -3.31     2.49 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##    -0.95     0.00    -0.12     1.34    -0.39    -1.09    -0.20     1.57 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##    -0.13     1.43    -0.25    -0.73     0.19     0.63    -0.05     0.07 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##    -0.86     1.78     0.71    -0.07     0.67     0.03    -1.14    -0.14 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##    -0.41    -0.42    -0.40     3.57     0.24    -0.37    -0.61    -0.91 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##    -1.31     0.24    -0.06     0.08     0.15     0.19    -1.34     1.93 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.00     0.00    -0.03     0.29    -0.13    -0.49     0.41     0.54 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##    -0.05     0.22     0.12     0.17    -0.57    -0.05    -0.66     0.38 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.08     0.48     0.00    -0.01    -0.34    -0.05     0.48     0.57 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##    -0.18    -0.61     0.42     0.14    -0.05     0.21     0.58     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##    -0.06    -0.89    -0.19     0.00    -0.07    -0.28     0.46    -0.47 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##    -0.06    -0.08    -0.63     0.94     0.03 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##    -0.28     0.07     0.26     1.06     0.58     0.00     1.19     0.39 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##    -2.88    -0.85    -0.02     0.00    -1.21    -1.70    -0.26     0.21 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.97     1.20    -1.54     0.83     0.45     1.38    -1.08    -0.36 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##     0.06     0.00    -0.10     0.87    -0.46    -0.12     0.15    -0.59 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##    -0.03     0.84     0.21    -0.74     0.26     1.47    -0.07    -0.48 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##     1.16    -3.41    -0.27     0.01     0.28     0.05    -0.09     0.99 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##     0.26     0.85    -1.30    -0.73     0.04    -0.19    -0.10     0.01 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##     0.69    -0.11    -0.22     0.80    -0.15    -0.61     0.84    -1.13 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.00     0.01     0.08    -0.25     0.07     0.14    -0.08    -0.27 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##     0.49    -0.19    -0.13     0.07     0.20     0.14    -0.35     0.03 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.00    -0.06     0.00     0.00     0.04    -0.11    -0.56     0.15 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.05     0.05    -0.09     0.00     0.02     0.23    -0.23    -0.01 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.01     0.34    -0.20     0.03    -0.08     0.07    -0.06    -0.07 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.00     0.09    -0.15     0.20     0.17 
##  b->o h1->o h2->o 
##  1.46 -3.05  1.48
vip(Adult_TDA_PC_5.50.5_n2_NN1Fit0,25) + ggtitle("Adult_TDA_PCA_5.50.5_n2_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_PC_5.50.5_n2_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n2_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n2_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n2_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   2071   656
##      >50K    5345  1696
##                                          
##                Accuracy : 0.3856         
##                  95% CI : (0.376, 0.3954)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 1              
##                                          
##                   Kappa : 2e-04          
##                                          
##  Mcnemar's Test P-Value : <2e-16         
##                                          
##             Sensitivity : 0.2793         
##             Specificity : 0.7211         
##          Pos Pred Value : 0.7594         
##          Neg Pred Value : 0.2409         
##              Prevalence : 0.7592         
##          Detection Rate : 0.2120         
##    Detection Prevalence : 0.2792         
##       Balanced Accuracy : 0.5002         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_pc_5.50.5_n2_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   2071   656
##      >50K    5345  1696
##                                          
##                Accuracy : 0.3856         
##                  95% CI : (0.376, 0.3954)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 1              
##                                          
##                   Kappa : 2e-04          
##                                          
##  Mcnemar's Test P-Value : <2e-16         
##                                          
##             Sensitivity : 0.2793         
##             Specificity : 0.7211         
##          Pos Pred Value : 0.7594         
##          Neg Pred Value : 0.2409         
##              Prevalence : 0.7592         
##          Detection Rate : 0.2120         
##    Detection Prevalence : 0.2792         
##       Balanced Accuracy : 0.5002         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_pc_5.50.5_n2_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   0.3856470106   0.0002079483   0.3759784054   0.3953838481   0.7592137592 
## AccuracyPValue  McnemarPValue 
##   1.0000000000   0.0000000000
ad_tda_pc_5.50.5_n2_nn1_cf0_ov_acc<-ad_tda_pc_5.50.5_n2_nn1_cf0$overall[1]
ad_tda_pc_5.50.5_n2_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.2792611            0.7210884            0.7594426 
##       Neg Pred Value            Precision               Recall 
##            0.2408749            0.7594426            0.2792611 
##                   F1           Prevalence       Detection Rate 
##            0.4083604            0.7592138            0.2120188 
## Detection Prevalence    Balanced Accuracy 
##            0.2791769            0.5001747
ad_tda_pc_5.50.5_n2_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n2_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_nn1_n2_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.50.5_n2_nn1_fit_re)
diff_tda_pca_5.50.5_nn1_n2_3_fold
##     Accuracy
## 1 0.10936675
## 2 0.13109167
## 3 0.09839498
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nn1.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nn1.n2_3_fold_odds.left<-bst_tda_pca_5.50.5_nn1.n2_3_fold$probLeft/bst_tda_pca_5.50.5_nn1.n2_3_fold$probRight
bst_tda_pca_5.50.5_nn1.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nn1.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.0089
## 
## $winRight
## [1] 0.9911
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nn1.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n2_3_fold
## $left
## [1] 0.004021497
## 
## $rope
## [1] 0.001685006
## 
## $right
## [1] 0.9942935
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nn1_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nn1.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1_n2_3_fold))
#bf_tda_pca_5.50.5_nn1.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nn1_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_nn1_n2_3_fold)
## t = 11.757, df = 2, p-value = 0.007157
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.07161399 0.15428828
## sample estimates:
## mean of x 
## 0.1129511
### Test set diff
diff_tda_pca_5.50.5_nn1.n2_test<-(nn1_cf_ov_acc - ad_tda_pc_5.50.5_n2_nn1_cf0_ov_acc)
diff_tda_pca_5.50.5_nn1.n2_test
##  Accuracy 
## 0.4182023
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nn1.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1.n2_test),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nn1.n2_test_odds.left<-bst_tda_pca_5.50.5_nn1.n2_test$probLeft/bst_tda_pca_5.50.5_nn1.n2_test$probRight
bst_tda_pca_5.50.5_nn1.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nn1.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1.n2_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1632
## 
## $winRight
## [1] 0.8368
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nn1.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nn1.n2_test)))

#BayesFactor
#bf_tda_pca_5.50.5_nn1.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1.n2_test)) #bf_tda_pca_5.50.5_nn1.n2_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nn1.n2_test))


##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node3

#Neural Network 1
Adult_TDA_PC_5.50.5_n3_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n3.vec, 
                           Importance = T,
                     method = 'nnet', 
                     trControl = fitControl,
                     tuneGrid = nn1Grid,
                     metric='Accuracy')
## # weights:  221
## initial  value 4879.915615 
## iter  10 value 4743.347164
## iter  20 value 4588.292117
## iter  30 value 4439.052935
## iter  40 value 4390.360078
## iter  50 value 4360.966943
## iter  60 value 4302.590832
## iter  70 value 4289.248075
## iter  80 value 4276.601422
## iter  90 value 4243.584358
## iter 100 value 4178.327807
## final  value 4178.327807 
## stopped after 100 iterations
## # weights:  331
## initial  value 5717.671774 
## iter  10 value 4590.052283
## iter  20 value 4558.250512
## iter  30 value 4557.583672
## iter  40 value 4440.789131
## iter  50 value 4395.767751
## iter  60 value 4314.179807
## iter  70 value 4300.861221
## iter  80 value 4298.080517
## iter  90 value 4295.598851
## iter 100 value 4256.146925
## final  value 4256.146925 
## stopped after 100 iterations
## # weights:  551
## initial  value 6908.583838 
## iter  10 value 4623.386189
## iter  20 value 4552.732983
## iter  30 value 4548.631516
## iter  40 value 4534.837916
## iter  50 value 4525.985493
## iter  60 value 4304.967451
## iter  70 value 4131.243820
## iter  80 value 4047.295098
## iter  90 value 4037.157618
## iter 100 value 4034.897931
## final  value 4034.897931 
## stopped after 100 iterations
## # weights:  771
## initial  value 5539.328058 
## iter  10 value 4559.327449
## iter  20 value 4461.207877
## iter  30 value 4455.104145
## iter  40 value 4329.993603
## iter  50 value 4286.033367
## iter  60 value 4221.404478
## iter  70 value 4099.533756
## iter  80 value 4028.892150
## iter  90 value 4008.616142
## iter 100 value 3952.765178
## final  value 3952.765178 
## stopped after 100 iterations
## # weights:  221
## initial  value 6434.195777 
## iter  10 value 4746.054348
## iter  20 value 4691.708094
## iter  30 value 4446.109012
## iter  40 value 4412.413185
## iter  50 value 4329.976016
## iter  60 value 4289.483677
## iter  70 value 4250.808631
## iter  80 value 4239.974246
## iter  90 value 4142.767420
## iter 100 value 4031.674428
## final  value 4031.674428 
## stopped after 100 iterations
## # weights:  331
## initial  value 6491.120192 
## iter  10 value 4478.544448
## iter  20 value 4448.093289
## iter  30 value 4445.296967
## iter  40 value 4419.316281
## iter  50 value 4369.235239
## iter  60 value 4309.403417
## iter  70 value 4031.950589
## iter  80 value 3754.356687
## iter  90 value 3594.700647
## iter 100 value 3538.361437
## final  value 3538.361437 
## stopped after 100 iterations
## # weights:  551
## initial  value 7331.608628 
## iter  10 value 4572.231963
## iter  20 value 4464.311572
## iter  30 value 4457.357831
## iter  40 value 4441.822628
## iter  50 value 4376.180568
## iter  60 value 4258.583702
## iter  70 value 4079.934709
## iter  80 value 3939.805425
## iter  90 value 3924.361920
## iter 100 value 3836.908717
## final  value 3836.908717 
## stopped after 100 iterations
## # weights:  771
## initial  value 8183.196746 
## iter  10 value 4759.170167
## iter  20 value 4743.734679
## iter  30 value 4575.282007
## iter  40 value 4437.663928
## iter  50 value 4346.923774
## iter  60 value 4336.171450
## iter  70 value 4314.683624
## iter  80 value 4296.495611
## iter  90 value 4271.573084
## iter 100 value 4178.279287
## final  value 4178.279287 
## stopped after 100 iterations
## # weights:  221
## initial  value 5008.681822 
## iter  10 value 4741.239082
## iter  20 value 4539.380981
## iter  30 value 4492.109512
## iter  40 value 4463.855871
## iter  50 value 4400.378848
## iter  60 value 4162.434503
## iter  70 value 3909.766774
## iter  80 value 3895.525762
## iter  90 value 3821.167083
## iter 100 value 3755.331587
## final  value 3755.331587 
## stopped after 100 iterations
## # weights:  331
## initial  value 4792.655014 
## iter  10 value 4744.797266
## iter  20 value 4743.583177
## iter  30 value 4702.715292
## iter  40 value 4674.294803
## iter  50 value 4449.610920
## iter  60 value 4446.122763
## iter  70 value 4432.072984
## iter  80 value 4429.208847
## iter  90 value 4418.239432
## iter 100 value 4408.310485
## final  value 4408.310485 
## stopped after 100 iterations
## # weights:  551
## initial  value 7736.057375 
## iter  10 value 4728.831203
## iter  20 value 4455.716820
## iter  30 value 4455.245690
## iter  40 value 4450.498672
## iter  50 value 4449.124462
## iter  60 value 4434.287770
## iter  70 value 4410.628138
## iter  80 value 4367.882895
## iter  90 value 4339.891879
## iter 100 value 4326.651892
## final  value 4326.651892 
## stopped after 100 iterations
## # weights:  771
## initial  value 6634.381569 
## iter  10 value 4645.650203
## iter  20 value 4461.200724
## iter  30 value 4456.385584
## iter  40 value 4453.756836
## iter  50 value 4451.989923
## iter  60 value 4448.961069
## iter  70 value 4439.774996
## iter  80 value 4391.684036
## iter  90 value 4329.771330
## iter 100 value 4303.631277
## final  value 4303.631277 
## stopped after 100 iterations
## # weights:  221
## initial  value 7436.966133 
## iter  10 value 4744.831272
## iter  20 value 4689.467117
## iter  30 value 4474.327974
## iter  40 value 4451.982367
## iter  50 value 4390.194016
## iter  60 value 4281.047730
## iter  70 value 4252.474211
## iter  80 value 4227.933641
## iter  90 value 4180.093085
## iter 100 value 3960.693394
## final  value 3960.693394 
## stopped after 100 iterations
## # weights:  331
## initial  value 7031.593739 
## iter  10 value 4744.285853
## iter  10 value 4744.285852
## iter  10 value 4744.285819
## final  value 4744.285819 
## converged
## # weights:  551
## initial  value 6049.686714 
## iter  10 value 4681.663691
## iter  20 value 4469.216910
## iter  30 value 4458.424468
## iter  40 value 4448.614478
## iter  50 value 4419.952357
## iter  60 value 4372.360183
## iter  70 value 4287.411331
## iter  80 value 4271.079840
## iter  90 value 4220.646255
## iter 100 value 4173.478094
## final  value 4173.478094 
## stopped after 100 iterations
## # weights:  771
## initial  value 4841.193604 
## iter  10 value 4728.354369
## iter  20 value 4710.279648
## iter  30 value 4548.352445
## iter  40 value 4519.495354
## iter  50 value 4481.979282
## iter  60 value 4458.186740
## iter  70 value 4449.000353
## iter  80 value 4447.704714
## iter  90 value 4445.253038
## iter 100 value 4437.621400
## final  value 4437.621400 
## stopped after 100 iterations
## # weights:  221
## initial  value 6137.545859 
## iter  10 value 4567.823244
## iter  20 value 4479.361485
## iter  30 value 4423.109661
## iter  40 value 4356.048283
## iter  50 value 4335.289193
## iter  60 value 4306.818330
## iter  70 value 4168.494715
## iter  80 value 3889.109872
## iter  90 value 3766.631692
## iter 100 value 3700.522889
## final  value 3700.522889 
## stopped after 100 iterations
## # weights:  331
## initial  value 4831.734014 
## iter  10 value 4723.220007
## iter  20 value 4488.363088
## iter  30 value 4469.012550
## iter  40 value 4464.506157
## iter  50 value 4417.764084
## iter  60 value 4345.181105
## iter  70 value 4312.060368
## iter  80 value 4277.271628
## iter  90 value 4068.703403
## iter 100 value 3837.796114
## final  value 3837.796114 
## stopped after 100 iterations
## # weights:  551
## initial  value 5755.839451 
## iter  10 value 4753.967443
## iter  20 value 4744.476163
## iter  30 value 4744.352344
## iter  40 value 4645.335907
## iter  50 value 4498.963957
## iter  60 value 4473.437108
## iter  70 value 4463.980294
## iter  80 value 4463.474165
## iter  90 value 4461.894376
## iter 100 value 4450.810662
## final  value 4450.810662 
## stopped after 100 iterations
## # weights:  771
## initial  value 6373.849548 
## iter  10 value 4544.364124
## iter  20 value 4471.579917
## iter  30 value 4458.884095
## iter  40 value 4328.601499
## iter  50 value 4130.865530
## iter  60 value 3909.542496
## iter  70 value 3899.689601
## iter  80 value 3843.685588
## iter  90 value 3805.279310
## iter 100 value 3700.999205
## final  value 3700.999205 
## stopped after 100 iterations
## # weights:  221
## initial  value 8068.849973 
## iter  10 value 4748.987219
## iter  20 value 4745.530792
## iter  30 value 4494.912395
## iter  40 value 4472.013977
## iter  50 value 4469.809937
## iter  60 value 4468.789707
## iter  70 value 4468.395799
## iter  80 value 4444.423215
## iter  90 value 4357.657434
## iter 100 value 4311.473092
## final  value 4311.473092 
## stopped after 100 iterations
## # weights:  331
## initial  value 5450.277968 
## iter  10 value 4745.036759
## iter  20 value 4497.857511
## iter  30 value 4374.536919
## iter  40 value 4307.618419
## iter  50 value 4294.810206
## iter  60 value 4290.389422
## iter  70 value 4246.026689
## iter  80 value 4113.148611
## iter  90 value 4004.690343
## iter 100 value 3841.911512
## final  value 3841.911512 
## stopped after 100 iterations
## # weights:  551
## initial  value 11939.710793 
## iter  10 value 4680.636221
## iter  20 value 4503.417759
## iter  30 value 4484.635539
## iter  40 value 4465.318136
## iter  50 value 4457.616017
## iter  60 value 4447.658017
## iter  70 value 4428.807542
## iter  80 value 4426.728793
## iter  90 value 4413.652531
## iter 100 value 4385.226803
## final  value 4385.226803 
## stopped after 100 iterations
## # weights:  771
## initial  value 4989.161230 
## iter  10 value 4727.822330
## iter  20 value 4539.994177
## iter  30 value 4482.795088
## iter  40 value 4453.834663
## iter  50 value 4442.613906
## iter  60 value 4377.639225
## iter  70 value 4323.971250
## iter  80 value 4292.383568
## iter  90 value 4262.514885
## iter 100 value 4238.450647
## final  value 4238.450647 
## stopped after 100 iterations
## # weights:  221
## initial  value 7331.816310 
## iter  10 value 4745.613146
## final  value 4745.612921 
## converged
## # weights:  331
## initial  value 5665.840522 
## iter  10 value 4732.439749
## iter  20 value 4512.019409
## iter  30 value 4509.099750
## iter  40 value 4489.990579
## iter  50 value 4476.166595
## iter  60 value 4456.938080
## iter  70 value 4417.618511
## iter  80 value 4363.154306
## iter  90 value 4332.725834
## iter 100 value 4317.083782
## final  value 4317.083782 
## stopped after 100 iterations
## # weights:  551
## initial  value 10467.928860 
## iter  10 value 4599.168140
## iter  20 value 4475.051095
## iter  30 value 4411.696255
## iter  40 value 4343.278509
## iter  50 value 4321.189767
## iter  60 value 4253.504321
## iter  70 value 4071.288139
## iter  80 value 3947.796311
## iter  90 value 3847.784742
## iter 100 value 3827.730223
## final  value 3827.730223 
## stopped after 100 iterations
## # weights:  771
## initial  value 5257.258948 
## iter  10 value 4611.017021
## iter  20 value 4480.430603
## iter  30 value 4474.793860
## iter  40 value 4474.401904
## iter  50 value 4470.872492
## iter  60 value 4437.812658
## iter  70 value 4427.773266
## iter  80 value 4352.751790
## iter  90 value 4309.415379
## iter 100 value 4295.831276
## final  value 4295.831276 
## stopped after 100 iterations
## # weights:  221
## initial  value 7411.124742 
## iter  10 value 4746.017917
## iter  20 value 4736.058560
## iter  30 value 4560.094087
## iter  40 value 4558.597382
## iter  50 value 4536.435687
## iter  60 value 4474.196717
## iter  70 value 4374.279824
## iter  80 value 4113.403575
## iter  90 value 3887.797574
## iter 100 value 3775.064598
## final  value 3775.064598 
## stopped after 100 iterations
## # weights:  331
## initial  value 7745.337927 
## iter  10 value 4655.189861
## iter  20 value 4596.415782
## iter  30 value 4585.962774
## iter  40 value 4477.632489
## iter  50 value 4473.349437
## iter  60 value 4472.668920
## iter  70 value 4454.277655
## iter  80 value 4347.262236
## iter  90 value 4336.343713
## iter 100 value 4329.886671
## final  value 4329.886671 
## stopped after 100 iterations
## # weights:  551
## initial  value 10712.356114 
## iter  10 value 4720.113559
## iter  20 value 4485.480143
## iter  30 value 4445.977773
## iter  40 value 4356.359805
## iter  50 value 4348.665498
## iter  60 value 4326.496732
## iter  70 value 4316.908592
## iter  80 value 4294.388905
## iter  90 value 4282.552246
## iter 100 value 4184.130452
## final  value 4184.130452 
## stopped after 100 iterations
## # weights:  771
## initial  value 5919.586744 
## iter  10 value 4733.872901
## iter  20 value 4721.875091
## iter  30 value 4720.556517
## iter  40 value 4438.767930
## iter  50 value 4351.584999
## iter  60 value 4340.370827
## iter  70 value 4305.318070
## iter  80 value 4252.155216
## iter  90 value 4090.920726
## iter 100 value 3957.698884
## final  value 3957.698884 
## stopped after 100 iterations
## # weights:  221
## initial  value 5680.128924 
## iter  10 value 4687.704731
## iter  20 value 4679.926447
## iter  30 value 4488.636328
## iter  40 value 4482.256924
## iter  50 value 4371.359927
## iter  60 value 4316.649897
## iter  70 value 4263.712220
## iter  80 value 4143.074334
## iter  90 value 3931.106330
## iter 100 value 3743.190202
## final  value 3743.190202 
## stopped after 100 iterations
## # weights:  331
## initial  value 5340.671866 
## iter  10 value 4761.479020
## iter  20 value 4760.031882
## iter  30 value 4723.472530
## iter  40 value 4484.147742
## iter  50 value 4474.479709
## iter  60 value 4462.713069
## iter  70 value 4432.763097
## iter  80 value 4386.784563
## iter  90 value 4146.629942
## iter 100 value 4026.973361
## final  value 4026.973361 
## stopped after 100 iterations
## # weights:  551
## initial  value 8655.370719 
## iter  10 value 4590.504931
## iter  20 value 4461.383781
## iter  30 value 4391.633613
## iter  40 value 4343.700563
## iter  50 value 4333.282413
## iter  60 value 4315.624975
## iter  70 value 4292.438364
## iter  80 value 4111.641646
## iter  90 value 4007.774019
## iter 100 value 3823.758104
## final  value 3823.758104 
## stopped after 100 iterations
## # weights:  771
## initial  value 7207.998132 
## iter  10 value 4732.380372
## iter  20 value 4468.188797
## iter  30 value 4441.390307
## iter  40 value 4431.013729
## iter  50 value 4402.109351
## iter  60 value 4313.238271
## iter  70 value 4280.791742
## iter  80 value 4274.593098
## iter  90 value 4258.924194
## iter 100 value 4257.464761
## final  value 4257.464761 
## stopped after 100 iterations
## # weights:  221
## initial  value 8784.072643 
## iter  10 value 7096.514418
## iter  20 value 6795.849461
## iter  30 value 6711.162734
## iter  40 value 6631.299212
## iter  50 value 6543.484733
## iter  60 value 6516.740081
## iter  70 value 6457.366924
## iter  80 value 6438.240478
## iter  90 value 6400.558606
## iter 100 value 6170.352610
## final  value 6170.352610 
## stopped after 100 iterations
Adult_TDA_PC_5.50.5_n3_NN1Fit0
## Neural Network 
## 
## 13240 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8826, 8827, 8827 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa    
##   2     0.3    0.7969024  0.2236936
##   2     0.5    0.8268131  0.4269583
##   2     0.7    0.8200900  0.3710531
##   3     0.3    0.7953163  0.1636010
##   3     0.5    0.8210718  0.3751329
##   3     0.7    0.8086114  0.3342103
##   5     0.3    0.8190331  0.3438761
##   5     0.5    0.8094401  0.3009132
##   5     0.7    0.8080064  0.2821051
##   7     0.3    0.8094400  0.2810180
##   7     0.5    0.8134448  0.3158719
##   7     0.7    0.8052116  0.2263384
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 2 and decay = 0.5.
Adult_TDA_PC_5.50.5_n3_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.8282348 0.4266051    Fold3
## 2 0.8305008 0.4637494    Fold2
## 3 0.8217037 0.3905204    Fold1
ad_tda_pc_5.50.5_n3_nn1_fit_re<-Adult_TDA_PC_5.50.5_n3_NN1Fit0$resample[1]

summary(Adult_TDA_PC_5.50.5_n3_NN1Fit0)
## a 108-2-1 network with 221 weights
## options were - entropy fitting  decay=0.5
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##    -1.29     0.00     0.52     1.50    -2.37     0.00    -0.89     0.40 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##    -0.60     0.18    -0.03     0.00     0.93     0.63     0.47    -1.16 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##    -1.34    -0.32    -0.32    -0.15     2.06    -2.01     2.00    -0.34 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##    -2.99     0.00     0.26     1.00     0.09    -0.21     0.01    -1.07 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##    -0.34     0.59     0.58    -0.85     0.52     3.53    -0.03    -0.21 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##    -0.05    -1.82    -0.05     0.31    -1.61     0.00    -0.80    -0.45 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##     1.12    -0.21    -1.53     0.42    -0.26     0.76    -1.05    -0.50 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##    -0.66    -0.02     0.50     2.55    -1.14    -3.19     3.02    -4.31 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.00     0.00     0.08     0.80     0.06     0.32     0.40    -0.01 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##    -0.69     0.00     0.02     0.07     0.05     0.21     0.19    -0.86 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.00    -0.21     0.00    -0.21    -0.01    -0.01     0.08    -0.08 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.03     0.13     0.05     0.12    -0.08    -1.39    -0.15     0.00 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##    -0.02     1.22     0.14     0.01    -0.41     0.34    -0.57     0.01 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.02    -0.22    -0.48    -0.17     0.00 
##  b->o h1->o h2->o 
## -0.77 -0.75  2.55
vip(Adult_TDA_PC_5.50.5_n3_NN1Fit0,25) + ggtitle("Adult_TDA_PCA_5.50.5_n3_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_PC_5.50.5_n3_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n3_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n3_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n3_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   4968  1693
##      >50K    2448   659
##                                           
##                Accuracy : 0.5761          
##                  95% CI : (0.5662, 0.5859)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : -0.045          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.6699          
##             Specificity : 0.2802          
##          Pos Pred Value : 0.7458          
##          Neg Pred Value : 0.2121          
##              Prevalence : 0.7592          
##          Detection Rate : 0.5086          
##    Detection Prevalence : 0.6819          
##       Balanced Accuracy : 0.4750          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n3_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   4968  1693
##      >50K    2448   659
##                                           
##                Accuracy : 0.5761          
##                  95% CI : (0.5662, 0.5859)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : -0.045          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.6699          
##             Specificity : 0.2802          
##          Pos Pred Value : 0.7458          
##          Neg Pred Value : 0.2121          
##              Prevalence : 0.7592          
##          Detection Rate : 0.5086          
##    Detection Prevalence : 0.6819          
##       Balanced Accuracy : 0.4750          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n3_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   5.760647e-01  -4.498026e-02   5.661919e-01   5.858921e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.000000e+00   1.042241e-31
ad_tda_pc_5.50.5_n3_nn1_cf0_ov_acc<-ad_tda_pc_5.50.5_n3_nn1_cf0$overall[1]
ad_tda_pc_5.50.5_n3_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.6699029            0.2801871            0.7458340 
##       Neg Pred Value            Precision               Recall 
##            0.2121017            0.7458340            0.6699029 
##                   F1           Prevalence       Detection Rate 
##            0.7058322            0.7592138            0.5085995 
## Detection Prevalence    Balanced Accuracy 
##            0.6819206            0.4750450
ad_tda_pc_5.50.5_n3_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n3_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_nn1_n3_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.50.5_n3_nn1_fit_re)
diff_tda_pca_5.50.5_nn1_n3_3_fold
##        Accuracy
## 1  0.0004043546
## 2  0.0157543076
## 3 -0.0147807957
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nn1.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n3_3_fold
## $probLeft
## [1] 0.25
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nn1.n3_3_fold_odds.left<-bst_tda_pca_5.50.5_nn1.n3_3_fold$probLeft/bst_tda_pca_5.50.5_nn1.n3_3_fold$probRight
bst_tda_pca_5.50.5_nn1.n3_3_fold_odds.left
## [1] 1
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nn1.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n3_3_fold
## $winLeft
## [1] 0.0501
## 
## $winRope
## [1] 0.9011
## 
## $winRight
## [1] 0.0488
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nn1.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n3_3_fold
## $left
## [1] 0.2060871
## 
## $rope
## [1] 0.5701477
## 
## $right
## [1] 0.2237652
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nn1_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nn1.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1_n3_3_fold))
#bf_tda_pca_5.50.5_nn1.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nn1_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_nn1_n3_3_fold)
## t = 0.052104, df = 2, p-value = 0.9632
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.03746760  0.03838617
## sample estimates:
##    mean of x 
## 0.0004592888
### Test set diff
diff_tda_pca_5.50.5_nn1.n3_test<-(nn1_cf_ov_acc - ad_tda_pc_5.50.5_n3_nn1_cf0_ov_acc)
diff_tda_pca_5.50.5_nn1.n3_test
##  Accuracy 
## 0.2277846
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nn1.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1.n3_test),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n3_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nn1.n3_test_odds.left<-bst_tda_pca_5.50.5_nn1.n3_test$probLeft/bst_tda_pca_5.50.5_nn1.n3_test$probRight
bst_tda_pca_5.50.5_nn1.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nn1.n3_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1.n3_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n3_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1562
## 
## $winRight
## [1] 0.8438
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nn1.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nn1.n3_test)))

#BayesFactor
#bf_tda_pca_5.50.5_nn1.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1.n3_test)) #bf_tda_pca_5.50.5_nn1.n3_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nn1.n3_test))


##Node4

#Neural Network 1
Adult_TDA_PC_5.50.5_n4_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n4.vec, 
                           Importance = T,
                     method = 'nnet', 
                     trControl = fitControl,
                     tuneGrid = nn1Grid,
                     metric='Accuracy')
## # weights:  221
## initial  value 6738.487163 
## iter  10 value 2374.533129
## iter  20 value 2374.292669
## iter  30 value 2362.497825
## iter  40 value 2207.317976
## iter  50 value 2111.772312
## iter  60 value 1830.003863
## iter  70 value 1745.573034
## iter  80 value 1726.343405
## iter  90 value 1711.031654
## iter 100 value 1698.981046
## final  value 1698.981046 
## stopped after 100 iterations
## # weights:  331
## initial  value 8011.925037 
## iter  10 value 2425.677034
## iter  20 value 2374.835024
## iter  30 value 2362.283702
## iter  40 value 2192.457711
## iter  50 value 2192.345594
## iter  60 value 2170.945545
## iter  70 value 2170.604943
## iter  80 value 2167.434702
## iter  90 value 2163.938948
## iter 100 value 2162.883474
## final  value 2162.883474 
## stopped after 100 iterations
## # weights:  551
## initial  value 2942.109143 
## iter  10 value 2374.178053
## iter  20 value 2373.737856
## iter  30 value 2361.645292
## iter  40 value 2189.053824
## iter  50 value 2180.884737
## iter  60 value 2176.638745
## iter  70 value 2168.115358
## iter  80 value 2167.842060
## iter  90 value 2161.836356
## iter 100 value 2161.225501
## final  value 2161.225501 
## stopped after 100 iterations
## # weights:  771
## initial  value 13472.895242 
## iter  10 value 2376.817406
## iter  20 value 2308.538686
## iter  30 value 2208.909928
## iter  40 value 2140.301080
## iter  50 value 2050.709301
## iter  60 value 1903.206786
## iter  70 value 1799.470462
## iter  80 value 1765.464863
## iter  90 value 1762.239936
## iter 100 value 1760.628122
## final  value 1760.628122 
## stopped after 100 iterations
## # weights:  221
## initial  value 8191.353439 
## iter  10 value 2227.073953
## iter  20 value 2117.343356
## iter  30 value 2090.365015
## iter  40 value 2055.522220
## iter  50 value 2019.286060
## iter  60 value 1900.210167
## iter  70 value 1807.355055
## iter  80 value 1766.385683
## iter  90 value 1761.642848
## iter 100 value 1760.165800
## final  value 1760.165800 
## stopped after 100 iterations
## # weights:  331
## initial  value 10343.691776 
## iter  10 value 2361.644320
## iter  20 value 2224.276041
## iter  30 value 2216.948512
## iter  40 value 2176.139622
## iter  50 value 2173.930374
## iter  60 value 2173.471800
## iter  70 value 2169.302616
## iter  80 value 2167.310281
## iter  90 value 2165.999789
## iter 100 value 2165.854397
## final  value 2165.854397 
## stopped after 100 iterations
## # weights:  551
## initial  value 10260.882822 
## iter  10 value 2160.800725
## iter  20 value 1755.586999
## iter  30 value 1724.609303
## iter  40 value 1715.382323
## iter  50 value 1712.474379
## iter  60 value 1685.868905
## iter  70 value 1677.087277
## iter  80 value 1675.197462
## iter  90 value 1671.592911
## iter 100 value 1667.695520
## final  value 1667.695520 
## stopped after 100 iterations
## # weights:  771
## initial  value 7930.190466 
## iter  10 value 2333.356739
## iter  20 value 2182.373379
## iter  30 value 2171.133683
## iter  40 value 2168.388278
## iter  50 value 2150.304770
## iter  60 value 2134.446934
## iter  70 value 2085.414051
## iter  80 value 2080.652053
## iter  90 value 2066.546534
## iter 100 value 2056.642515
## final  value 2056.642515 
## stopped after 100 iterations
## # weights:  221
## initial  value 9182.616591 
## iter  10 value 2378.393845
## iter  20 value 2172.136281
## iter  30 value 2138.617650
## iter  40 value 2099.234439
## iter  50 value 2082.480958
## iter  60 value 2074.195096
## iter  70 value 2062.184417
## iter  80 value 2049.995213
## iter  90 value 2042.512112
## iter 100 value 2041.725829
## final  value 2041.725829 
## stopped after 100 iterations
## # weights:  331
## initial  value 11730.626548 
## iter  10 value 2381.387869
## iter  20 value 2346.534521
## iter  30 value 2299.140390
## iter  40 value 2161.321798
## iter  50 value 2068.821920
## iter  60 value 2039.418910
## iter  70 value 1961.519084
## iter  80 value 1931.128656
## iter  90 value 1894.400233
## iter 100 value 1826.461889
## final  value 1826.461889 
## stopped after 100 iterations
## # weights:  551
## initial  value 12558.503941 
## iter  10 value 2907.245167
## iter  20 value 2206.268137
## iter  30 value 2184.655170
## iter  40 value 2048.686307
## iter  50 value 1808.871462
## iter  60 value 1757.698683
## iter  70 value 1722.727646
## iter  80 value 1690.013854
## iter  90 value 1669.029730
## iter 100 value 1660.881366
## final  value 1660.881366 
## stopped after 100 iterations
## # weights:  771
## initial  value 4337.705135 
## iter  10 value 2406.358044
## iter  20 value 2314.681386
## iter  30 value 2240.475579
## iter  40 value 2190.706965
## iter  50 value 2156.777411
## iter  60 value 2153.787340
## iter  70 value 2151.175818
## iter  80 value 2147.510879
## iter  90 value 2146.825099
## final  value 2146.786781 
## converged
## # weights:  221
## initial  value 12276.631257 
## iter  10 value 2392.900102
## iter  20 value 2121.590158
## iter  30 value 1856.046854
## iter  40 value 1787.541948
## iter  50 value 1738.021230
## iter  60 value 1717.060730
## iter  70 value 1701.277993
## iter  80 value 1695.022920
## iter  90 value 1688.591486
## iter 100 value 1688.297192
## final  value 1688.297192 
## stopped after 100 iterations
## # weights:  331
## initial  value 5939.917236 
## iter  10 value 2376.493697
## iter  20 value 2243.766809
## iter  30 value 2149.403369
## iter  40 value 2102.066549
## iter  50 value 2092.980023
## iter  60 value 2079.641890
## iter  70 value 2019.992714
## iter  80 value 1996.567180
## iter  90 value 1839.747107
## iter 100 value 1698.568688
## final  value 1698.568688 
## stopped after 100 iterations
## # weights:  551
## initial  value 11958.743601 
## iter  10 value 2507.418861
## iter  20 value 2291.993389
## iter  30 value 2059.185754
## iter  40 value 1973.001425
## iter  50 value 1805.466056
## iter  60 value 1728.006545
## iter  70 value 1723.705590
## iter  80 value 1674.785841
## iter  90 value 1661.808090
## iter 100 value 1657.136902
## final  value 1657.136902 
## stopped after 100 iterations
## # weights:  771
## initial  value 4838.008939 
## iter  10 value 2279.069710
## iter  20 value 2108.880432
## iter  30 value 2056.115417
## iter  40 value 2053.961660
## iter  50 value 2046.357970
## iter  60 value 2031.464106
## iter  70 value 1967.196716
## iter  80 value 1922.851556
## iter  90 value 1902.022519
## iter 100 value 1826.472241
## final  value 1826.472241 
## stopped after 100 iterations
## # weights:  221
## initial  value 7992.151113 
## iter  10 value 2353.237620
## iter  20 value 2348.140621
## iter  30 value 2348.058850
## iter  40 value 2156.803174
## iter  50 value 2153.426031
## iter  60 value 2153.360807
## iter  70 value 2152.998465
## iter  80 value 2150.910018
## iter  90 value 2126.461344
## iter 100 value 2112.012671
## final  value 2112.012671 
## stopped after 100 iterations
## # weights:  331
## initial  value 8869.793580 
## iter  10 value 2237.773111
## iter  20 value 2190.033065
## iter  30 value 2189.674336
## iter  40 value 2170.279677
## iter  50 value 2169.368497
## iter  60 value 2168.702205
## iter  70 value 2161.482484
## iter  80 value 2091.221658
## iter  90 value 1989.536404
## iter 100 value 1911.175278
## final  value 1911.175278 
## stopped after 100 iterations
## # weights:  551
## initial  value 5825.468117 
## iter  10 value 2258.867594
## iter  20 value 2179.930119
## iter  30 value 2165.219295
## iter  40 value 2160.543659
## iter  50 value 2151.780699
## iter  60 value 2142.032684
## iter  70 value 2124.867017
## iter  80 value 2122.398257
## iter  90 value 2060.497685
## iter 100 value 1990.404053
## final  value 1990.404053 
## stopped after 100 iterations
## # weights:  771
## initial  value 5127.189956 
## iter  10 value 2366.712027
## iter  20 value 2215.023526
## iter  30 value 2134.784981
## iter  40 value 2099.275743
## iter  50 value 2079.053045
## iter  60 value 2059.680987
## iter  70 value 2055.748604
## iter  80 value 2051.597445
## iter  90 value 2047.046401
## iter 100 value 2036.947990
## final  value 2036.947990 
## stopped after 100 iterations
## # weights:  221
## initial  value 8647.308450 
## iter  10 value 2381.509739
## iter  20 value 2190.187893
## iter  30 value 2162.938974
## iter  40 value 2096.036619
## iter  50 value 2077.219433
## iter  60 value 2072.528938
## iter  70 value 2044.762863
## iter  80 value 2028.994327
## iter  90 value 2010.701103
## iter 100 value 1958.831118
## final  value 1958.831118 
## stopped after 100 iterations
## # weights:  331
## initial  value 7021.401500 
## iter  10 value 2380.297391
## iter  20 value 2378.815358
## iter  30 value 2378.798285
## iter  40 value 2377.355176
## iter  50 value 2282.357048
## iter  60 value 2160.217272
## iter  70 value 2159.088910
## iter  80 value 2147.956988
## iter  90 value 2145.271867
## iter 100 value 2141.011054
## final  value 2141.011054 
## stopped after 100 iterations
## # weights:  551
## initial  value 6009.537043 
## iter  10 value 2380.232231
## iter  20 value 2154.965651
## iter  30 value 2091.348607
## iter  40 value 2089.938056
## iter  50 value 2089.577205
## iter  60 value 2068.335716
## iter  70 value 2035.177879
## iter  80 value 1885.472038
## iter  90 value 1779.177357
## iter 100 value 1739.264210
## final  value 1739.264210 
## stopped after 100 iterations
## # weights:  771
## initial  value 4420.451110 
## iter  10 value 2379.495624
## iter  20 value 2377.587726
## iter  30 value 2377.553924
## iter  40 value 2181.655375
## iter  50 value 2163.447447
## iter  60 value 2163.241251
## iter  70 value 2162.845528
## iter  80 value 2161.510684
## final  value 2160.730993 
## converged
## # weights:  221
## initial  value 5370.549674 
## iter  10 value 2374.405073
## iter  20 value 2374.291261
## iter  30 value 2369.348823
## iter  40 value 2368.900626
## iter  50 value 2115.523039
## iter  60 value 2092.214751
## iter  70 value 2091.527568
## final  value 2091.509864 
## converged
## # weights:  331
## initial  value 4044.925745 
## iter  10 value 2374.546648
## iter  20 value 2373.893540
## iter  30 value 2373.885645
## iter  40 value 2363.103969
## iter  50 value 2362.780949
## iter  60 value 2362.664889
## iter  70 value 2361.970499
## iter  80 value 2360.684571
## iter  90 value 2342.260617
## iter 100 value 2239.066137
## final  value 2239.066137 
## stopped after 100 iterations
## # weights:  551
## initial  value 19236.013921 
## iter  10 value 2405.694491
## iter  20 value 2371.190863
## iter  30 value 2370.893510
## iter  40 value 2369.699125
## iter  50 value 2363.989737
## iter  60 value 2239.107437
## iter  70 value 2176.710443
## iter  80 value 2154.764151
## iter  90 value 2127.929718
## iter 100 value 2106.623475
## final  value 2106.623475 
## stopped after 100 iterations
## # weights:  771
## initial  value 10444.926587 
## iter  10 value 2610.619023
## iter  20 value 2406.488112
## iter  30 value 2306.394509
## iter  40 value 2194.077134
## iter  50 value 2133.276296
## iter  60 value 2100.885811
## iter  70 value 2086.672293
## iter  80 value 2071.223737
## iter  90 value 2047.750887
## iter 100 value 1963.747370
## final  value 1963.747370 
## stopped after 100 iterations
## # weights:  221
## initial  value 13119.713477 
## iter  10 value 2224.565767
## iter  20 value 1998.240862
## iter  30 value 1866.093889
## iter  40 value 1818.173508
## iter  50 value 1760.605189
## iter  60 value 1738.423692
## iter  70 value 1724.398512
## iter  80 value 1723.087300
## iter  90 value 1721.493916
## iter 100 value 1721.421277
## final  value 1721.421277 
## stopped after 100 iterations
## # weights:  331
## initial  value 7992.706894 
## iter  10 value 2360.019469
## iter  20 value 2349.621187
## iter  30 value 2269.221106
## iter  40 value 2137.344640
## iter  50 value 2130.920189
## iter  60 value 2117.647840
## iter  70 value 2087.449011
## iter  80 value 2020.705575
## iter  90 value 1818.412589
## iter 100 value 1771.176372
## final  value 1771.176372 
## stopped after 100 iterations
## # weights:  551
## initial  value 7125.057980 
## iter  10 value 2396.013277
## iter  20 value 2380.535515
## iter  30 value 2370.696175
## iter  40 value 2310.372072
## iter  50 value 2206.228975
## iter  60 value 2142.194461
## iter  70 value 2105.376359
## iter  80 value 1997.502373
## iter  90 value 1953.273664
## iter 100 value 1843.063768
## final  value 1843.063768 
## stopped after 100 iterations
## # weights:  771
## initial  value 12166.355805 
## iter  10 value 2387.546584
## iter  20 value 2384.644058
## iter  30 value 2211.222644
## iter  40 value 2153.546373
## iter  50 value 2084.101200
## iter  60 value 2065.458439
## iter  70 value 1943.573970
## iter  80 value 1850.546217
## iter  90 value 1833.729406
## iter 100 value 1828.183210
## final  value 1828.183210 
## stopped after 100 iterations
## # weights:  221
## initial  value 9453.213503 
## iter  10 value 2379.452210
## iter  20 value 2334.491838
## iter  30 value 2180.480275
## iter  40 value 2173.067242
## iter  50 value 2140.523117
## iter  60 value 2113.306541
## iter  70 value 2045.471726
## iter  80 value 1959.855737
## iter  90 value 1842.466994
## iter 100 value 1773.947810
## final  value 1773.947810 
## stopped after 100 iterations
## # weights:  331
## initial  value 5237.533364 
## iter  10 value 2380.032987
## iter  20 value 2378.125842
## iter  30 value 2193.991466
## iter  40 value 2183.969294
## iter  50 value 2161.233899
## iter  60 value 2124.089860
## iter  70 value 2099.525225
## iter  80 value 2081.414112
## iter  90 value 2032.971513
## iter 100 value 1986.856475
## final  value 1986.856475 
## stopped after 100 iterations
## # weights:  551
## initial  value 13684.506351 
## iter  10 value 2382.787310
## iter  20 value 2378.273282
## iter  30 value 2362.902621
## iter  40 value 2249.679942
## iter  50 value 2180.465502
## iter  60 value 2145.983337
## iter  70 value 2130.022763
## iter  80 value 2125.014374
## iter  90 value 2118.525811
## iter 100 value 2107.609682
## final  value 2107.609682 
## stopped after 100 iterations
## # weights:  771
## initial  value 4137.908039 
## iter  10 value 2376.015477
## iter  20 value 2246.337502
## iter  30 value 2128.222490
## iter  40 value 2111.597124
## iter  50 value 2099.149021
## iter  60 value 2089.268973
## iter  70 value 2075.537254
## iter  80 value 2073.994178
## iter  90 value 2069.086719
## iter 100 value 2053.667398
## final  value 2053.667398 
## stopped after 100 iterations
## # weights:  771
## initial  value 20166.737908 
## iter  10 value 3567.074234
## iter  20 value 3561.796271
## iter  30 value 3482.815181
## iter  40 value 3247.982898
## iter  50 value 3235.572524
## iter  60 value 3229.839157
## iter  70 value 3179.758606
## iter  80 value 3122.692279
## iter  90 value 3107.187417
## iter 100 value 3098.274268
## final  value 3098.274268 
## stopped after 100 iterations
Adult_TDA_PC_5.50.5_n4_NN1Fit0
## Neural Network 
## 
## 16700 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 11133, 11134, 11133 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa     
##   2     0.3    0.9514970  0.27512234
##   2     0.5    0.9507186  0.27516556
##   2     0.7    0.9508981  0.24936513
##   3     0.3    0.9485033  0.12560280
##   3     0.5    0.9472454  0.09368955
##   3     0.7    0.9497604  0.21684410
##   5     0.3    0.9507784  0.26568168
##   5     0.5    0.9517963  0.27278170
##   5     0.7    0.9522155  0.27915221
##   7     0.3    0.9495809  0.27591656
##   7     0.5    0.9531736  0.28182707
##   7     0.7    0.9488023  0.27553687
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 7 and decay = 0.5.
Adult_TDA_PC_5.50.5_n4_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.9522098 0.2598833    Fold2
## 2 0.9541944 0.3027466    Fold1
## 3 0.9531166 0.2828513    Fold3
ad_tda_pc_5.50.5_n4_nn1_fit_re<-Adult_TDA_PC_5.50.5_n4_NN1Fit0$resample[1]

summary(Adult_TDA_PC_5.50.5_n4_NN1Fit0)
## a 108-7-1 network with 771 weights
## options were - entropy fitting  decay=0.5
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.00     0.00     0.00     0.01     0.00     0.00     0.00     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.01     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##     0.02     0.22     0.00     0.00     0.00     0.00     0.01     0.00 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##     0.00     0.01     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.00     0.00     0.00     0.00     0.00    -0.01     0.00     0.03 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##     0.00     0.00     0.00     0.00     0.17    -0.04     0.00     0.00 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.00     0.07     0.00     0.00     0.00    -0.05     0.00     0.01 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##     0.03     0.00     0.01     0.00     0.00     0.00     0.01     0.00 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##     0.00     0.00     0.01     0.00     0.02     0.00     0.04    -0.04 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##     0.00     0.00     0.00     0.01     0.00     0.01     0.00     0.03 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##    -0.01     0.00     0.15     0.00     0.00     0.00     0.00     0.00 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.00     0.00     0.02     0.00     0.00 
##    b->h3   i1->h3   i2->h3   i3->h3   i4->h3   i5->h3   i6->h3   i7->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h3   i9->h3  i10->h3  i11->h3  i12->h3  i13->h3  i14->h3  i15->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h3  i17->h3  i18->h3  i19->h3  i20->h3  i21->h3  i22->h3  i23->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h3  i25->h3  i26->h3  i27->h3  i28->h3  i29->h3  i30->h3  i31->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h3  i33->h3  i34->h3  i35->h3  i36->h3  i37->h3  i38->h3  i39->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h3  i41->h3  i42->h3  i43->h3  i44->h3  i45->h3  i46->h3  i47->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h3  i49->h3  i50->h3  i51->h3  i52->h3  i53->h3  i54->h3  i55->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h3  i57->h3  i58->h3  i59->h3  i60->h3  i61->h3  i62->h3  i63->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h3  i65->h3  i66->h3  i67->h3  i68->h3  i69->h3  i70->h3  i71->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h3  i73->h3  i74->h3  i75->h3  i76->h3  i77->h3  i78->h3  i79->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h3  i81->h3  i82->h3  i83->h3  i84->h3  i85->h3  i86->h3  i87->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h3  i89->h3  i90->h3  i91->h3  i92->h3  i93->h3  i94->h3  i95->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h3  i97->h3  i98->h3  i99->h3 i100->h3 i101->h3 i102->h3 i103->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h4   i1->h4   i2->h4   i3->h4   i4->h4   i5->h4   i6->h4   i7->h4 
##    -0.97    -0.08    -0.31     0.22     0.17     0.00    -0.75     0.02 
##   i8->h4   i9->h4  i10->h4  i11->h4  i12->h4  i13->h4  i14->h4  i15->h4 
##    -0.38     0.11    -0.03     0.00    -0.06    -0.36    -0.52    -0.14 
##  i16->h4  i17->h4  i18->h4  i19->h4  i20->h4  i21->h4  i22->h4  i23->h4 
##     0.09     0.00    -0.19     0.04    -0.55     0.61     0.00    -0.58 
##  i24->h4  i25->h4  i26->h4  i27->h4  i28->h4  i29->h4  i30->h4  i31->h4 
##     0.45    -0.19     0.00     0.42    -0.50     0.10     0.00     0.15 
##  i32->h4  i33->h4  i34->h4  i35->h4  i36->h4  i37->h4  i38->h4  i39->h4 
##    -0.02    -0.57    -0.72     0.10    -0.31    -0.68     0.00    -0.26 
##  i40->h4  i41->h4  i42->h4  i43->h4  i44->h4  i45->h4  i46->h4  i47->h4 
##     0.37    -0.03    -0.39    -0.15     0.37     0.04     0.21     0.00 
##  i48->h4  i49->h4  i50->h4  i51->h4  i52->h4  i53->h4  i54->h4  i55->h4 
##    -0.68     0.83    -0.28    -0.80     0.15     0.01     0.08    -0.96 
##  i56->h4  i57->h4  i58->h4  i59->h4  i60->h4  i61->h4  i62->h4  i63->h4 
##     0.56     0.09    -0.04    -0.25     0.02    -0.80     0.22    -1.19 
##  i64->h4  i65->h4  i66->h4  i67->h4  i68->h4  i69->h4  i70->h4  i71->h4 
##     0.00     0.00    -0.09     0.12    -0.19     0.00     0.00     0.00 
##  i72->h4  i73->h4  i74->h4  i75->h4  i76->h4  i77->h4  i78->h4  i79->h4 
##     0.00     0.00     0.00     0.00    -0.01     0.15     0.02     0.00 
##  i80->h4  i81->h4  i82->h4  i83->h4  i84->h4  i85->h4  i86->h4  i87->h4 
##    -0.14     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h4  i89->h4  i90->h4  i91->h4  i92->h4  i93->h4  i94->h4  i95->h4 
##     0.03     0.25     0.00     0.15     0.00    -0.31     0.00     0.00 
##  i96->h4  i97->h4  i98->h4  i99->h4 i100->h4 i101->h4 i102->h4 i103->h4 
##     0.00    -0.05     0.00    -0.53     0.20     0.00     0.00     0.00 
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4 
##     0.00     0.00    -0.67     0.00     0.00 
##    b->h5   i1->h5   i2->h5   i3->h5   i4->h5   i5->h5   i6->h5   i7->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h5   i9->h5  i10->h5  i11->h5  i12->h5  i13->h5  i14->h5  i15->h5 
##     0.00     0.00     0.00    -0.10     0.00     0.00     0.00     0.00 
##  i16->h5  i17->h5  i18->h5  i19->h5  i20->h5  i21->h5  i22->h5  i23->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h5  i25->h5  i26->h5  i27->h5  i28->h5  i29->h5  i30->h5  i31->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h5  i33->h5  i34->h5  i35->h5  i36->h5  i37->h5  i38->h5  i39->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h5  i41->h5  i42->h5  i43->h5  i44->h5  i45->h5  i46->h5  i47->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h5  i49->h5  i50->h5  i51->h5  i52->h5  i53->h5  i54->h5  i55->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h5  i57->h5  i58->h5  i59->h5  i60->h5  i61->h5  i62->h5  i63->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h5  i65->h5  i66->h5  i67->h5  i68->h5  i69->h5  i70->h5  i71->h5 
##    -0.03     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h5  i73->h5  i74->h5  i75->h5  i76->h5  i77->h5  i78->h5  i79->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h5  i81->h5  i82->h5  i83->h5  i84->h5  i85->h5  i86->h5  i87->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h5  i89->h5  i90->h5  i91->h5  i92->h5  i93->h5  i94->h5  i95->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h5  i97->h5  i98->h5  i99->h5 i100->h5 i101->h5 i102->h5 i103->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h6   i1->h6   i2->h6   i3->h6   i4->h6   i5->h6   i6->h6   i7->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h6   i9->h6  i10->h6  i11->h6  i12->h6  i13->h6  i14->h6  i15->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h6  i17->h6  i18->h6  i19->h6  i20->h6  i21->h6  i22->h6  i23->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h6  i25->h6  i26->h6  i27->h6  i28->h6  i29->h6  i30->h6  i31->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h6  i33->h6  i34->h6  i35->h6  i36->h6  i37->h6  i38->h6  i39->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h6  i41->h6  i42->h6  i43->h6  i44->h6  i45->h6  i46->h6  i47->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h6  i49->h6  i50->h6  i51->h6  i52->h6  i53->h6  i54->h6  i55->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h6  i57->h6  i58->h6  i59->h6  i60->h6  i61->h6  i62->h6  i63->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h6  i65->h6  i66->h6  i67->h6  i68->h6  i69->h6  i70->h6  i71->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h6  i73->h6  i74->h6  i75->h6  i76->h6  i77->h6  i78->h6  i79->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h6  i81->h6  i82->h6  i83->h6  i84->h6  i85->h6  i86->h6  i87->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h6  i89->h6  i90->h6  i91->h6  i92->h6  i93->h6  i94->h6  i95->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h6  i97->h6  i98->h6  i99->h6 i100->h6 i101->h6 i102->h6 i103->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h6 i105->h6 i106->h6 i107->h6 i108->h6 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h7   i1->h7   i2->h7   i3->h7   i4->h7   i5->h7   i6->h7   i7->h7 
##     0.00    -0.01     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h7   i9->h7  i10->h7  i11->h7  i12->h7  i13->h7  i14->h7  i15->h7 
##     0.00     0.00     0.00     0.26     0.00     0.00     0.00     0.00 
##  i16->h7  i17->h7  i18->h7  i19->h7  i20->h7  i21->h7  i22->h7  i23->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h7  i25->h7  i26->h7  i27->h7  i28->h7  i29->h7  i30->h7  i31->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h7  i33->h7  i34->h7  i35->h7  i36->h7  i37->h7  i38->h7  i39->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h7  i41->h7  i42->h7  i43->h7  i44->h7  i45->h7  i46->h7  i47->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h7  i49->h7  i50->h7  i51->h7  i52->h7  i53->h7  i54->h7  i55->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h7  i57->h7  i58->h7  i59->h7  i60->h7  i61->h7  i62->h7  i63->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h7  i65->h7  i66->h7  i67->h7  i68->h7  i69->h7  i70->h7  i71->h7 
##     0.13     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h7  i73->h7  i74->h7  i75->h7  i76->h7  i77->h7  i78->h7  i79->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h7  i81->h7  i82->h7  i83->h7  i84->h7  i85->h7  i86->h7  i87->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h7  i89->h7  i90->h7  i91->h7  i92->h7  i93->h7  i94->h7  i95->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h7  i97->h7  i98->h7  i99->h7 i100->h7 i101->h7 i102->h7 i103->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h7 i105->h7 i106->h7 i107->h7 i108->h7 
##     0.00     0.00     0.00     0.00     0.00 
##  b->o h1->o h2->o h3->o h4->o h5->o h6->o h7->o 
## -0.75  0.00 -1.65  0.00  3.38 -0.01  0.00 -0.68
vip(Adult_TDA_PC_5.50.5_n4_NN1Fit0,25) + ggtitle("Adult_TDA_PCA_5.50.5_n4_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_PC_5.50.5_n4_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n4_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n4_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n4_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7407  2028
##      >50K       9   324
##                                           
##                Accuracy : 0.7915          
##                  95% CI : (0.7833, 0.7995)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1.984e-14       
##                                           
##                   Kappa : 0.1932          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9988          
##             Specificity : 0.1378          
##          Pos Pred Value : 0.7851          
##          Neg Pred Value : 0.9730          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7583          
##    Detection Prevalence : 0.9659          
##       Balanced Accuracy : 0.5683          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n4_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7407  2028
##      >50K       9   324
##                                           
##                Accuracy : 0.7915          
##                  95% CI : (0.7833, 0.7995)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1.984e-14       
##                                           
##                   Kappa : 0.1932          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9988          
##             Specificity : 0.1378          
##          Pos Pred Value : 0.7851          
##          Neg Pred Value : 0.9730          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7583          
##    Detection Prevalence : 0.9659          
##       Balanced Accuracy : 0.5683          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n4_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.914619e-01   1.931511e-01   7.832678e-01   7.994819e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.983630e-14   0.000000e+00
ad_tda_pc_5.50.5_n4_nn1_cf0_ov_acc<-ad_tda_pc_5.50.5_n4_nn1_cf0$overall[1]
ad_tda_pc_5.50.5_n4_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9987864            0.1377551            0.7850556 
##       Neg Pred Value            Precision               Recall 
##            0.9729730            0.7850556            0.9987864 
##                   F1           Prevalence       Detection Rate 
##            0.8791170            0.7592138            0.7582924 
## Detection Prevalence    Balanced Accuracy 
##            0.9659091            0.5682708
ad_tda_pc_5.50.5_n4_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n4_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_nn1_n4_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.50.5_n4_nn1_fit_re)
diff_tda_pca_5.50.5_nn1_n4_3_fold
##     Accuracy
## 1 -0.1235707
## 2 -0.1079393
## 3 -0.1461937
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nn1.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n4_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nn1.n4_3_fold_odds.left<-bst_tda_pca_5.50.5_nn1.n4_3_fold$probLeft/bst_tda_pca_5.50.5_nn1.n4_3_fold$probRight
bst_tda_pca_5.50.5_nn1.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nn1.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n4_3_fold
## $winLeft
## [1] 0.9912667
## 
## $winRope
## [1] 0.008733333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nn1.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n4_3_fold
## $left
## [1] 0.9939905
## 
## $rope
## [1] 0.001617117
## 
## $right
## [1] 0.004392391
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nn1_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nn1.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1_n4_3_fold))
#bf_tda_pca_5.50.5_nn1.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nn1_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_nn1_n4_3_fold)
## t = -11.338, df = 2, p-value = 0.00769
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.17367967 -0.07812279
## sample estimates:
##  mean of x 
## -0.1259012
### Test set diff
diff_tda_pca_5.50.5_nn1.n4_test<-(nn1_cf_ov_acc - ad_tda_pc_5.50.5_n4_nn1_cf0_ov_acc)
diff_tda_pca_5.50.5_nn1.n4_test
##   Accuracy 
## 0.01238739
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nn1.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1.n4_test),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nn1.n4_test_odds.left<-bst_tda_pca_5.50.5_nn1.n4_test$probLeft/bst_tda_pca_5.50.5_nn1.n4_test$probRight
bst_tda_pca_5.50.5_nn1.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nn1.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1.n4_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.4589333
## 
## $winRight
## [1] 0.5410667
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nn1.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nn1.n4_test)))

#BayesFactor
#bf_tda_pca_5.50.5_nn1.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1.n4_test)) #bf_tda_pca_5.50.5_nn1.n4_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nn1.n4_test))


##Node5

#Neural Network 1
Adult_TDA_PC_5.50.5_n5_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n5.vec, 
                           Importance = T,
                     method = 'nnet', 
                     trControl = fitControl,
                     tuneGrid = nn1Grid,
                     metric='Accuracy')
## # weights:  221
## initial  value 8114.725127 
## iter  10 value 955.349176
## iter  20 value 146.535214
## iter  30 value 143.323738
## iter  40 value 127.664320
## iter  50 value 122.090303
## iter  60 value 122.051380
## final  value 122.051077 
## converged
## # weights:  331
## initial  value 6782.900302 
## iter  10 value 803.208127
## iter  20 value 333.939333
## iter  30 value 145.915091
## iter  40 value 143.901633
## iter  50 value 142.368765
## iter  60 value 142.232359
## iter  70 value 140.371788
## iter  80 value 132.766661
## iter  90 value 125.434288
## final  value 125.432492 
## converged
## # weights:  551
## initial  value 8386.479716 
## iter  10 value 743.156498
## iter  20 value 144.207283
## iter  30 value 140.479720
## iter  40 value 139.431464
## iter  50 value 139.192910
## iter  60 value 139.190149
## final  value 139.190134 
## converged
## # weights:  771
## initial  value 4819.555805 
## iter  10 value 200.603002
## iter  20 value 146.212628
## iter  30 value 145.385861
## iter  40 value 144.678457
## iter  50 value 143.970247
## iter  60 value 143.853175
## iter  70 value 142.967678
## iter  80 value 129.755083
## iter  90 value 129.279069
## iter 100 value 127.733859
## final  value 127.733859 
## stopped after 100 iterations
## # weights:  221
## initial  value 10295.661329 
## iter  10 value 213.438469
## iter  20 value 148.268561
## iter  30 value 147.367112
## iter  40 value 127.034871
## iter  50 value 125.579759
## iter  60 value 123.756647
## iter  70 value 123.616960
## iter  80 value 123.613790
## final  value 123.613779 
## converged
## # weights:  331
## initial  value 15602.447840 
## final  value 8979.567482 
## converged
## # weights:  551
## initial  value 11106.726781 
## final  value 7495.000763 
## converged
## # weights:  771
## initial  value 2393.182194 
## iter  10 value 142.491150
## iter  20 value 140.265643
## iter  30 value 140.067256
## iter  40 value 133.844875
## iter  50 value 124.776606
## iter  60 value 120.905279
## iter  70 value 120.832997
## iter  80 value 120.826763
## iter  90 value 120.823279
## iter 100 value 120.819807
## final  value 120.819807 
## stopped after 100 iterations
## # weights:  221
## initial  value 10468.441696 
## iter  10 value 1247.377788
## iter  20 value 168.174707
## iter  30 value 146.895843
## iter  40 value 146.098308
## iter  50 value 146.093954
## iter  60 value 146.093590
## iter  60 value 146.093589
## iter  60 value 146.093589
## final  value 146.093589 
## converged
## # weights:  331
## initial  value 5508.624612 
## iter  10 value 230.074516
## iter  20 value 171.841236
## iter  30 value 136.128824
## iter  40 value 128.867653
## iter  50 value 128.791065
## iter  60 value 128.457196
## iter  70 value 127.576533
## iter  80 value 127.394796
## iter  90 value 127.386014
## final  value 127.385918 
## converged
## # weights:  551
## initial  value 5316.624827 
## iter  10 value 156.247147
## iter  20 value 141.740356
## iter  30 value 141.728673
## final  value 141.728637 
## converged
## # weights:  771
## initial  value 1762.810200 
## iter  10 value 209.300775
## iter  20 value 191.437920
## iter  30 value 160.002776
## iter  40 value 141.128098
## iter  50 value 136.372898
## iter  60 value 128.243583
## iter  70 value 127.395994
## iter  80 value 126.962541
## iter  90 value 126.691776
## iter 100 value 126.665930
## final  value 126.665930 
## stopped after 100 iterations
## # weights:  221
## initial  value 3594.508096 
## iter  10 value 155.875304
## iter  20 value 147.750919
## iter  30 value 147.233474
## final  value 147.233451 
## converged
## # weights:  331
## initial  value 11346.401158 
## iter  10 value 1261.155634
## iter  20 value 234.482796
## iter  30 value 157.728370
## iter  40 value 149.114510
## iter  50 value 149.093072
## iter  60 value 147.694964
## iter  70 value 147.238760
## iter  80 value 147.207997
## iter  90 value 137.221416
## iter 100 value 134.855688
## final  value 134.855688 
## stopped after 100 iterations
## # weights:  551
## initial  value 5955.395537 
## iter  10 value 1284.530259
## iter  20 value 1108.383585
## iter  30 value 167.299559
## iter  40 value 141.045737
## iter  50 value 132.956208
## iter  60 value 125.785625
## iter  70 value 118.741038
## iter  80 value 112.880101
## iter  90 value 110.337522
## iter 100 value 110.059933
## final  value 110.059933 
## stopped after 100 iterations
## # weights:  771
## initial  value 4078.741639 
## iter  10 value 184.352920
## iter  20 value 164.487132
## iter  30 value 141.024537
## iter  40 value 135.898757
## iter  50 value 135.333847
## iter  60 value 135.065944
## iter  70 value 135.012249
## iter  80 value 133.208397
## iter  90 value 132.811225
## iter 100 value 132.542635
## final  value 132.542635 
## stopped after 100 iterations
## # weights:  221
## initial  value 7277.681072 
## final  value 5289.067554 
## converged
## # weights:  331
## initial  value 2375.764083 
## iter  10 value 152.299399
## iter  20 value 139.283081
## iter  30 value 138.535381
## iter  40 value 138.452331
## iter  50 value 137.794557
## iter  60 value 137.322953
## iter  70 value 137.316473
## final  value 137.316392 
## converged
## # weights:  551
## initial  value 8107.701183 
## iter  10 value 2481.926506
## iter  20 value 2228.135442
## iter  30 value 179.059568
## iter  40 value 149.800349
## iter  50 value 148.249261
## iter  60 value 148.167834
## iter  70 value 148.164986
## iter  80 value 147.397685
## iter  90 value 146.852759
## iter 100 value 138.775744
## final  value 138.775744 
## stopped after 100 iterations
## # weights:  771
## initial  value 3248.406574 
## iter  10 value 164.645719
## iter  20 value 154.746074
## iter  30 value 149.569158
## iter  40 value 124.585744
## iter  50 value 122.993783
## iter  60 value 122.249542
## iter  70 value 120.929034
## iter  80 value 119.778015
## iter  90 value 119.173984
## iter 100 value 118.981551
## final  value 118.981551 
## stopped after 100 iterations
## # weights:  221
## initial  value 4831.366019 
## iter  10 value 159.649543
## iter  20 value 152.165837
## iter  30 value 152.157330
## iter  40 value 152.155297
## iter  40 value 152.155296
## iter  40 value 152.155296
## final  value 152.155296 
## converged
## # weights:  331
## initial  value 7396.843625 
## iter  10 value 2463.167185
## iter  20 value 729.514344
## iter  30 value 152.528177
## iter  40 value 151.392722
## iter  50 value 150.091777
## iter  60 value 150.048227
## iter  70 value 150.039919
## iter  80 value 150.033212
## iter  90 value 150.017878
## iter 100 value 150.016910
## final  value 150.016910 
## stopped after 100 iterations
## # weights:  551
## initial  value 16915.034203 
## iter  10 value 752.216866
## iter  20 value 148.773141
## iter  30 value 148.587417
## iter  40 value 143.924784
## iter  50 value 140.169230
## iter  60 value 140.115269
## iter  70 value 139.716756
## iter  80 value 137.682901
## iter  90 value 137.158001
## iter 100 value 135.820296
## final  value 135.820296 
## stopped after 100 iterations
## # weights:  771
## initial  value 7981.096380 
## iter  10 value 393.883238
## iter  20 value 385.018614
## iter  30 value 151.684513
## iter  40 value 149.105774
## iter  50 value 148.495400
## iter  60 value 144.066648
## iter  70 value 141.319638
## iter  80 value 140.220873
## iter  90 value 138.690152
## iter 100 value 137.689693
## final  value 137.689693 
## stopped after 100 iterations
## # weights:  221
## initial  value 11031.729530 
## iter  10 value 1196.093223
## iter  20 value 164.338322
## iter  30 value 142.999081
## iter  40 value 132.856462
## iter  50 value 127.788561
## iter  60 value 127.627346
## final  value 127.625925 
## converged
## # weights:  331
## initial  value 16118.653862 
## iter  10 value 1902.207115
## iter  20 value 1726.198084
## iter  30 value 246.984968
## iter  40 value 150.851762
## iter  50 value 140.998849
## iter  60 value 139.281696
## final  value 138.674114 
## converged
## # weights:  551
## initial  value 9114.665553 
## iter  10 value 2264.368277
## iter  20 value 179.679897
## iter  30 value 144.885412
## iter  40 value 139.021383
## iter  50 value 130.078052
## iter  60 value 129.906488
## iter  70 value 129.901615
## iter  80 value 129.113833
## iter  90 value 128.537884
## iter 100 value 128.527035
## final  value 128.527035 
## stopped after 100 iterations
## # weights:  771
## initial  value 14718.897146 
## iter  10 value 3575.512617
## iter  20 value 175.844489
## iter  30 value 140.132871
## iter  40 value 139.780904
## iter  50 value 134.525000
## iter  60 value 128.386603
## iter  70 value 123.424979
## iter  80 value 122.106268
## iter  90 value 121.464634
## iter 100 value 121.442818
## final  value 121.442818 
## stopped after 100 iterations
## # weights:  221
## initial  value 7210.044833 
## final  value 5085.771188 
## converged
## # weights:  331
## initial  value 7840.623688 
## final  value 5267.270212 
## converged
## # weights:  551
## initial  value 9766.210830 
## final  value 5801.465928 
## converged
## # weights:  771
## initial  value 3726.600694 
## iter  10 value 156.985537
## iter  20 value 145.841976
## iter  30 value 137.877349
## iter  40 value 134.690931
## iter  50 value 132.271504
## iter  60 value 131.970327
## iter  70 value 131.737639
## iter  80 value 131.597859
## iter  90 value 131.406544
## iter 100 value 131.226594
## final  value 131.226594 
## stopped after 100 iterations
## # weights:  221
## initial  value 8557.747268 
## iter  10 value 1115.936081
## iter  20 value 171.405250
## iter  30 value 150.476714
## iter  40 value 150.366741
## iter  50 value 146.091929
## final  value 146.091313 
## converged
## # weights:  331
## initial  value 7840.229346 
## iter  10 value 3125.471990
## iter  20 value 165.025097
## iter  30 value 149.651050
## iter  40 value 140.373153
## iter  50 value 133.566021
## iter  60 value 131.890493
## iter  70 value 129.795329
## iter  80 value 128.192149
## iter  90 value 127.986255
## iter 100 value 127.977585
## final  value 127.977585 
## stopped after 100 iterations
## # weights:  551
## initial  value 2820.838170 
## iter  10 value 183.534617
## iter  20 value 158.604882
## iter  30 value 151.101081
## iter  40 value 134.003396
## iter  50 value 133.852541
## iter  60 value 133.838889
## iter  70 value 133.838061
## final  value 133.838052 
## converged
## # weights:  771
## initial  value 3323.601046 
## iter  10 value 143.718280
## iter  20 value 141.750196
## iter  30 value 141.727870
## iter  40 value 141.713513
## iter  50 value 141.673753
## iter  60 value 141.514376
## iter  70 value 141.210279
## iter  80 value 137.529991
## iter  90 value 131.693695
## iter 100 value 129.787842
## final  value 129.787842 
## stopped after 100 iterations
## # weights:  221
## initial  value 16601.226835 
## iter  10 value 235.554930
## iter  20 value 222.178009
## iter  30 value 218.092472
## final  value 217.850120 
## converged
Adult_TDA_PC_5.50.5_n5_NN1Fit0
## Neural Network 
## 
## 14404 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 9603, 9603, 9602 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa
##   2     0.3    0.9979867  0    
##   2     0.5    0.9979867  0    
##   2     0.7    0.9979867  0    
##   3     0.3    0.9979867  0    
##   3     0.5    0.9979867  0    
##   3     0.7    0.9979867  0    
##   5     0.3    0.9979867  0    
##   5     0.5    0.9979867  0    
##   5     0.7    0.9979867  0    
##   7     0.3    0.9979867  0    
##   7     0.5    0.9979867  0    
##   7     0.7    0.9979867  0    
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 2 and decay = 0.7.
Adult_TDA_PC_5.50.5_n5_NN1Fit0$resample
##    Accuracy Kappa Resample
## 1 0.9979175     0    Fold3
## 2 0.9981254     0    Fold2
## 3 0.9979171     0    Fold1
ad_tda_pc_5.50.5_n5_nn1_fit_re<-Adult_TDA_PC_5.50.5_n5_NN1Fit0$resample[1]

summary(Adult_TDA_PC_5.50.5_n5_NN1Fit0)
## a 108-2-1 network with 221 weights
## options were - entropy fitting  decay=0.7
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.00     0.00     0.00     0.00     0.00 
##  b->o h1->o h2->o 
## -2.04 -2.04 -2.04
vip(Adult_TDA_PC_5.50.5_n5_NN1Fit0,25) + ggtitle("Adult_TDA_PCA_5.50.5_n5_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_PC_5.50.5_n5_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n5_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n5_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n5_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n5_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n5_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.7592138      0.0000000      0.7506071      0.7676657      0.7592138 
## AccuracyPValue  McnemarPValue 
##      0.5055358      0.0000000
ad_tda_pc_5.50.5_n5_nn1_cf0_ov_acc<-ad_tda_pc_5.50.5_n5_nn1_cf0$overall[1]
ad_tda_pc_5.50.5_n5_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            1.0000000            0.0000000            0.7592138 
##       Neg Pred Value            Precision               Recall 
##                  NaN            0.7592138            1.0000000 
##                   F1           Prevalence       Detection Rate 
##            0.8631285            0.7592138            0.7592138 
## Detection Prevalence    Balanced Accuracy 
##            1.0000000            0.5000000
ad_tda_pc_5.50.5_n5_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n5_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_nn1_n5_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.50.5_n5_nn1_fit_re)
diff_tda_pca_5.50.5_nn1_n5_3_fold
##     Accuracy
## 1 -0.1692784
## 2 -0.1518703
## 3 -0.1909942
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nn1.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n5_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nn1.n5_3_fold_odds.left<-bst_tda_pca_5.50.5_nn1.n5_3_fold$probLeft/bst_tda_pca_5.50.5_nn1.n5_3_fold$probRight
bst_tda_pca_5.50.5_nn1.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nn1.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1_n5_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n5_3_fold
## $winLeft
## [1] 0.9905333
## 
## $winRope
## [1] 0.009466667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nn1.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n5_3_fold
## $left
## [1] 0.9967268
## 
## $rope
## [1] 0.0006790979
## 
## $right
## [1] 0.002594112
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nn1_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nn1.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1_n5_3_fold))
#bf_tda_pca_5.50.5_nn1.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nn1_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_nn1_n5_3_fold)
## t = -15.085, df = 2, p-value = 0.004366
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.2194070 -0.1220216
## sample estimates:
##  mean of x 
## -0.1707143
### Test set diff
diff_tda_pca_5.50.5_nn1.n5_test<-(nn1_cf_ov_acc - ad_tda_pc_5.50.5_n5_nn1_cf0_ov_acc)
diff_tda_pca_5.50.5_nn1.n5_test
##   Accuracy 
## 0.04463554
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nn1.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1.n5_test),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nn1.n5_test_odds.left<-bst_tda_pca_5.50.5_nn1.n5_test$probLeft/bst_tda_pca_5.50.5_nn1.n5_test$probRight
bst_tda_pca_5.50.5_nn1.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nn1.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1.n5_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1599
## 
## $winRight
## [1] 0.8401
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nn1.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nn1.n5_test)))

#BayesFactor
#bf_tda_pca_5.50.5_nn1.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1.n5_test)) #bf_tda_pca_5.50.5_nn1.n5_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nn1.n5_test))


##With TDA KDE filter 5 intervals, 50% overlap, 5 bins 
##Node1

#Neural Network 1
Adult_TDA_KDE_5.50.5_n1_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n1.vec, 
                           Importance = T,
                     method = 'nnet', 
                     trControl = fitControl,
                     tuneGrid = nn1Grid,
                     metric='Accuracy')
## # weights:  221
## initial  value 6149.257507 
## iter  10 value 5108.018332
## iter  20 value 4701.587790
## iter  30 value 4666.466015
## iter  40 value 4611.630260
## iter  50 value 4500.538714
## iter  60 value 4468.956661
## iter  70 value 4433.934881
## iter  80 value 4395.204283
## iter  90 value 4381.527680
## iter 100 value 4367.839204
## final  value 4367.839204 
## stopped after 100 iterations
## # weights:  331
## initial  value 5679.128402 
## iter  10 value 5069.737704
## iter  20 value 4676.178389
## iter  30 value 4652.939849
## iter  40 value 4646.944859
## iter  50 value 4585.979057
## iter  60 value 4543.743732
## iter  70 value 4525.201564
## iter  80 value 4510.096137
## iter  90 value 4504.194642
## iter 100 value 4162.779379
## final  value 4162.779379 
## stopped after 100 iterations
## # weights:  551
## initial  value 10235.010522 
## iter  10 value 5045.386653
## iter  20 value 4986.907411
## iter  30 value 4953.260081
## iter  40 value 4907.668238
## iter  50 value 4905.538685
## iter  60 value 4584.030818
## iter  70 value 4541.912777
## iter  80 value 4512.474436
## iter  90 value 4501.659006
## iter 100 value 4494.602528
## final  value 4494.602528 
## stopped after 100 iterations
## # weights:  771
## initial  value 11318.371368 
## iter  10 value 4990.763017
## iter  20 value 4655.244236
## iter  30 value 4516.639038
## iter  40 value 4502.771933
## iter  50 value 4493.240297
## iter  60 value 4491.945598
## iter  70 value 4423.030165
## iter  80 value 4307.274819
## iter  90 value 4144.973196
## iter 100 value 3840.732895
## final  value 3840.732895 
## stopped after 100 iterations
## # weights:  221
## initial  value 5520.959303 
## iter  10 value 5108.114505
## iter  20 value 5108.058151
## iter  30 value 5089.052374
## iter  40 value 4771.188428
## iter  50 value 4615.340975
## iter  60 value 4543.117500
## iter  70 value 4486.918050
## iter  80 value 4388.467045
## iter  90 value 3694.932972
## iter 100 value 3200.524854
## final  value 3200.524854 
## stopped after 100 iterations
## # weights:  331
## initial  value 6710.880706 
## iter  10 value 4896.023460
## iter  20 value 4647.248076
## iter  30 value 4602.133160
## iter  40 value 4548.314515
## iter  50 value 4523.473325
## iter  60 value 4515.536828
## iter  70 value 4512.185141
## iter  80 value 4511.321099
## iter  90 value 4430.075888
## iter 100 value 4363.506317
## final  value 4363.506317 
## stopped after 100 iterations
## # weights:  551
## initial  value 6562.785894 
## iter  10 value 4861.429721
## iter  20 value 4718.997443
## iter  30 value 4608.132469
## iter  40 value 4537.775361
## iter  50 value 4532.561750
## iter  60 value 4525.383242
## iter  70 value 4522.510067
## iter  80 value 4521.112476
## iter  90 value 4499.717359
## iter 100 value 4430.437803
## final  value 4430.437803 
## stopped after 100 iterations
## # weights:  771
## initial  value 5483.256670 
## iter  10 value 4916.368015
## iter  20 value 4684.151027
## iter  30 value 4659.355482
## iter  40 value 4649.167432
## iter  50 value 4597.285065
## iter  60 value 4512.612367
## iter  70 value 4508.300699
## iter  80 value 4495.659100
## iter  90 value 4351.693676
## iter 100 value 3920.617188
## final  value 3920.617188 
## stopped after 100 iterations
## # weights:  221
## initial  value 7712.866670 
## iter  10 value 5057.826841
## iter  20 value 4819.834152
## iter  30 value 4645.605327
## iter  40 value 4630.552478
## iter  50 value 4626.996150
## iter  50 value 4626.996122
## iter  60 value 4626.354827
## iter  60 value 4626.354814
## iter  60 value 4626.354783
## final  value 4626.354783 
## converged
## # weights:  331
## initial  value 7011.132706 
## iter  10 value 4768.359414
## iter  20 value 4645.994592
## iter  30 value 4638.213370
## iter  40 value 4635.561297
## iter  50 value 4631.813245
## iter  60 value 4586.973308
## iter  70 value 4529.990823
## iter  80 value 4187.960908
## iter  90 value 3799.771722
## iter 100 value 3564.169943
## final  value 3564.169943 
## stopped after 100 iterations
## # weights:  551
## initial  value 7591.221582 
## iter  10 value 4990.106659
## iter  20 value 4574.258368
## iter  30 value 4527.467275
## iter  40 value 4387.813075
## iter  50 value 4258.403410
## iter  60 value 3991.814086
## iter  70 value 3706.095815
## iter  80 value 3586.852956
## iter  90 value 3034.743639
## iter 100 value 2979.883890
## final  value 2979.883890 
## stopped after 100 iterations
## # weights:  771
## initial  value 5610.548217 
## iter  10 value 5106.354204
## iter  20 value 4726.773647
## iter  30 value 4646.696782
## iter  40 value 4629.275668
## iter  50 value 4627.532491
## iter  60 value 4598.061086
## iter  70 value 4517.098927
## iter  80 value 4505.942250
## iter  90 value 4499.452091
## iter 100 value 4429.505364
## final  value 4429.505364 
## stopped after 100 iterations
## # weights:  221
## initial  value 5297.241595 
## iter  10 value 5106.248258
## iter  20 value 4955.685843
## iter  30 value 4675.760200
## iter  40 value 4589.422605
## iter  50 value 4513.516405
## iter  60 value 4487.658854
## iter  70 value 4438.309984
## iter  80 value 4394.948858
## iter  90 value 4382.588040
## iter 100 value 4371.694577
## final  value 4371.694577 
## stopped after 100 iterations
## # weights:  331
## initial  value 5183.466604 
## iter  10 value 4716.022050
## iter  20 value 4678.910589
## iter  30 value 4677.504934
## iter  40 value 4676.809908
## iter  50 value 4668.693443
## iter  60 value 4652.850609
## iter  70 value 4611.124336
## iter  80 value 4587.011241
## iter  90 value 4567.034426
## iter 100 value 4506.136705
## final  value 4506.136705 
## stopped after 100 iterations
## # weights:  551
## initial  value 5939.145444 
## iter  10 value 4772.120073
## iter  20 value 4625.157925
## iter  30 value 4616.410300
## iter  40 value 4596.744362
## iter  50 value 4568.508404
## iter  60 value 4549.664495
## iter  70 value 4543.627994
## iter  80 value 4516.806760
## iter  90 value 4465.065662
## iter 100 value 4444.147983
## final  value 4444.147983 
## stopped after 100 iterations
## # weights:  771
## initial  value 6414.094798 
## iter  10 value 4691.277109
## iter  20 value 4650.941967
## iter  30 value 4628.268867
## iter  40 value 4576.166580
## iter  50 value 4486.536580
## iter  60 value 4464.902834
## iter  70 value 4454.541743
## iter  80 value 4449.511660
## iter  90 value 4416.328082
## iter 100 value 4399.814355
## final  value 4399.814355 
## stopped after 100 iterations
## # weights:  221
## initial  value 7356.658578 
## iter  10 value 4764.319945
## iter  20 value 4587.245909
## iter  30 value 4393.915676
## iter  40 value 4064.333946
## iter  50 value 3882.195932
## iter  60 value 3511.096413
## iter  70 value 3239.811744
## iter  80 value 3033.924748
## iter  90 value 2996.109066
## iter 100 value 2964.376398
## final  value 2964.376398 
## stopped after 100 iterations
## # weights:  331
## initial  value 9729.059132 
## iter  10 value 4799.005109
## iter  20 value 4649.331267
## iter  30 value 4583.841428
## iter  40 value 4543.288877
## iter  50 value 4503.870606
## iter  60 value 4493.471910
## final  value 4492.872494 
## converged
## # weights:  551
## initial  value 4978.033018 
## iter  10 value 4854.813149
## iter  20 value 4615.115602
## iter  30 value 4555.157353
## iter  40 value 4517.198474
## iter  50 value 4509.925321
## iter  60 value 4506.732058
## iter  70 value 4482.024381
## iter  80 value 4433.567472
## iter  90 value 4361.271870
## iter 100 value 4037.976597
## final  value 4037.976597 
## stopped after 100 iterations
## # weights:  771
## initial  value 5419.739649 
## iter  10 value 4960.641659
## iter  20 value 4616.064063
## iter  30 value 4562.225406
## iter  40 value 4536.947597
## iter  50 value 4406.089299
## iter  60 value 4311.050253
## iter  70 value 4260.917859
## iter  80 value 4070.925039
## iter  90 value 3998.172367
## iter 100 value 3515.750289
## final  value 3515.750289 
## stopped after 100 iterations
## # weights:  221
## initial  value 5458.777873 
## iter  10 value 5103.378366
## iter  20 value 5099.680081
## iter  30 value 4711.987731
## iter  40 value 4690.200488
## iter  50 value 4612.666320
## iter  60 value 4551.924312
## iter  70 value 4479.295483
## iter  80 value 4463.728063
## iter  90 value 4442.810002
## iter 100 value 4378.049218
## final  value 4378.049218 
## stopped after 100 iterations
## # weights:  331
## initial  value 7462.260623 
## iter  10 value 5113.281040
## iter  20 value 5112.419030
## iter  30 value 4965.240163
## iter  40 value 4604.816908
## iter  50 value 4020.046490
## iter  60 value 3590.729382
## iter  70 value 3303.711063
## iter  80 value 3090.466739
## iter  90 value 2998.550633
## iter 100 value 2899.911776
## final  value 2899.911776 
## stopped after 100 iterations
## # weights:  551
## initial  value 8637.078222 
## iter  10 value 4945.286979
## iter  20 value 4564.542715
## iter  30 value 4550.576082
## iter  40 value 4534.179443
## iter  50 value 4520.269648
## iter  60 value 4511.053179
## iter  70 value 4510.498167
## iter  80 value 4507.017650
## iter  90 value 4500.673922
## iter 100 value 4440.085325
## final  value 4440.085325 
## stopped after 100 iterations
## # weights:  771
## initial  value 8162.037620 
## iter  10 value 5113.889669
## iter  20 value 4731.761823
## iter  30 value 4660.539972
## iter  40 value 4634.191853
## iter  50 value 4571.634155
## iter  60 value 4527.078527
## iter  70 value 4479.745544
## iter  80 value 4474.089820
## iter  90 value 4467.439895
## iter 100 value 4459.472910
## final  value 4459.472910 
## stopped after 100 iterations
## # weights:  221
## initial  value 4962.370422 
## iter  10 value 4598.031315
## iter  20 value 4564.615450
## iter  30 value 4527.833510
## iter  40 value 4484.328666
## iter  50 value 4451.501728
## iter  60 value 4382.199237
## iter  70 value 4308.511211
## iter  80 value 4182.339538
## iter  90 value 3672.833858
## iter 100 value 3216.128558
## final  value 3216.128558 
## stopped after 100 iterations
## # weights:  331
## initial  value 7044.793797 
## iter  10 value 4954.378310
## iter  20 value 4792.413478
## iter  30 value 4541.286078
## iter  40 value 4504.711834
## iter  50 value 4489.906554
## iter  60 value 4428.877486
## iter  70 value 4336.139257
## iter  80 value 4173.775202
## iter  90 value 3871.665388
## iter 100 value 3538.236733
## final  value 3538.236733 
## stopped after 100 iterations
## # weights:  551
## initial  value 6602.027091 
## iter  10 value 5106.506349
## iter  20 value 5106.482076
## iter  30 value 5083.413279
## iter  40 value 4760.366516
## iter  50 value 4638.425011
## iter  60 value 4623.685584
## iter  70 value 4607.771689
## iter  80 value 4589.913724
## iter  90 value 4417.725996
## iter 100 value 4382.675172
## final  value 4382.675172 
## stopped after 100 iterations
## # weights:  771
## initial  value 5187.250075 
## iter  10 value 4819.379437
## iter  20 value 4575.529253
## iter  30 value 4433.642407
## iter  40 value 4414.184861
## iter  50 value 4396.438547
## iter  60 value 4384.625017
## iter  70 value 4382.013948
## iter  80 value 4380.106629
## iter  90 value 4369.364028
## iter 100 value 4359.055680
## final  value 4359.055680 
## stopped after 100 iterations
## # weights:  221
## initial  value 5611.099438 
## iter  10 value 4704.032286
## iter  20 value 4610.244721
## iter  30 value 4538.297049
## iter  40 value 4425.365191
## iter  50 value 4394.450964
## iter  60 value 4377.554494
## iter  70 value 4213.687303
## iter  80 value 3995.268979
## iter  90 value 3399.400671
## iter 100 value 3051.919931
## final  value 3051.919931 
## stopped after 100 iterations
## # weights:  331
## initial  value 5534.033510 
## iter  10 value 5107.713154
## iter  20 value 5106.659772
## iter  30 value 5106.647626
## iter  40 value 4608.839973
## iter  50 value 4477.289871
## iter  60 value 4457.836667
## iter  70 value 4405.385054
## iter  80 value 4399.868443
## iter  90 value 3777.697981
## iter 100 value 3154.410301
## final  value 3154.410301 
## stopped after 100 iterations
## # weights:  551
## initial  value 5663.189802 
## iter  10 value 4982.783056
## iter  20 value 4947.235604
## iter  30 value 4610.671218
## iter  40 value 4521.837737
## iter  50 value 4506.276454
## iter  60 value 4500.294131
## iter  70 value 4482.275642
## iter  80 value 4478.956429
## iter  90 value 4471.335274
## iter 100 value 4469.224863
## final  value 4469.224863 
## stopped after 100 iterations
## # weights:  771
## initial  value 5214.844903 
## iter  10 value 4845.890369
## iter  20 value 4580.797245
## iter  30 value 4531.100914
## iter  40 value 4492.796003
## iter  50 value 4443.191170
## iter  60 value 4428.200236
## iter  70 value 4400.691902
## iter  80 value 4386.248316
## iter  90 value 4385.691119
## iter 100 value 3637.756318
## final  value 3637.756318 
## stopped after 100 iterations
## # weights:  221
## initial  value 6354.505278 
## iter  10 value 4954.238186
## iter  20 value 4642.413217
## iter  30 value 4623.786662
## iter  40 value 4574.684848
## iter  50 value 4464.538142
## iter  60 value 4451.843182
## iter  70 value 4089.929540
## iter  80 value 3868.273933
## iter  90 value 3455.307673
## iter 100 value 3283.861911
## final  value 3283.861911 
## stopped after 100 iterations
## # weights:  331
## initial  value 5178.842939 
## iter  10 value 4982.434083
## iter  20 value 4787.135464
## iter  30 value 4572.976882
## iter  40 value 4413.203965
## iter  50 value 4335.943926
## iter  60 value 4182.315784
## iter  70 value 3235.333952
## iter  80 value 3110.375400
## iter  90 value 3086.575699
## iter 100 value 2871.844382
## final  value 2871.844382 
## stopped after 100 iterations
## # weights:  551
## initial  value 6566.047250 
## iter  10 value 5101.035299
## iter  20 value 4663.743917
## iter  30 value 4624.192993
## iter  40 value 4518.139519
## iter  50 value 4449.580502
## iter  60 value 4406.523765
## iter  70 value 4358.334112
## iter  80 value 4328.920748
## iter  90 value 4124.631703
## iter 100 value 4053.940115
## final  value 4053.940115 
## stopped after 100 iterations
## # weights:  771
## initial  value 11415.967320 
## iter  10 value 4711.374071
## iter  20 value 4552.640612
## iter  30 value 4508.286388
## iter  40 value 4451.782833
## iter  50 value 4407.997159
## iter  60 value 4384.536901
## iter  70 value 4374.365819
## iter  80 value 4373.127890
## iter  90 value 4365.791917
## iter 100 value 4358.501380
## final  value 4358.501380 
## stopped after 100 iterations
## # weights:  331
## initial  value 8157.908702 
## iter  10 value 7660.646412
## iter  20 value 7600.615456
## iter  30 value 6969.887234
## iter  40 value 6964.516545
## iter  50 value 6883.570813
## iter  60 value 6873.163230
## iter  70 value 6861.552904
## iter  80 value 6791.667125
## iter  90 value 6448.674621
## iter 100 value 6014.759468
## final  value 6014.759468 
## stopped after 100 iterations
Adult_TDA_KDE_5.50.5_n1_NN1Fit0
## Neural Network 
## 
## 13387 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8925, 8925, 8924 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa    
##   2     0.3    0.8133244  0.4314430
##   2     0.5    0.8448499  0.5951524
##   2     0.7    0.8092908  0.4022641
##   3     0.3    0.8101874  0.4301478
##   3     0.5    0.8069748  0.4054663
##   3     0.7    0.8476133  0.5887745
##   5     0.3    0.7923358  0.3360933
##   5     0.5    0.7888999  0.2876728
##   5     0.7    0.8154193  0.4151006
##   7     0.3    0.7923356  0.3219716
##   7     0.5    0.8154940  0.4662657
##   7     0.7    0.7992084  0.3512407
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 3 and decay = 0.7.
Adult_TDA_KDE_5.50.5_n1_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.8478602 0.5860868    Fold3
## 2 0.8655312 0.6414787    Fold2
## 3 0.8294487 0.5387579    Fold1
ad_tda_kde_5.50.5_n1_nn1_fit_re<-Adult_TDA_KDE_5.50.5_n1_NN1Fit0$resample[1]

summary(Adult_TDA_KDE_5.50.5_n1_NN1Fit0)
## a 108-3-1 network with 331 weights
## options were - entropy fitting  decay=0.7
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     1.00     2.03    -0.41    -0.41    -1.58     0.00     3.62     0.06 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.46    -0.74     0.00     0.00     0.03     1.24     0.03     0.08 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.46    -0.03    -0.58     0.93    -0.14     0.81    -0.94    -0.25 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##    -1.44     0.00     0.96    -0.16     3.09    -0.82     0.00     2.12 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.04    -1.87     1.51     0.03    -0.41     1.29     0.01    -0.09 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.11    -0.44     0.05     0.99    -0.11    -0.01    -1.15     0.28 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.90    -0.27    -0.16     1.61    -1.46     0.75    -0.31    -0.01 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.42    -0.05    -0.19     0.47     0.05     0.73    -1.04     2.04 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##    -0.08    -0.19     0.52     1.02     0.00    -0.12     0.00     0.00 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.09     0.00     0.00    -0.23    -0.05     0.00    -0.18     0.00 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##    -0.01     0.00     0.00     0.00     0.00     0.00    -0.16     0.00 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.00     0.00     0.00     0.06     0.00     0.63     0.01     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.00     0.04     0.00     0.04     0.00     0.43    -0.17 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.00    -0.42     0.02     0.00 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##    -4.32     0.09    -1.02    -0.10    -1.47    -0.02     0.31    -1.30 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##    -0.39    -0.37     0.04     0.00    -1.32    -2.77    -0.46    -0.15 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##    -0.04    -1.26    -0.38    -0.10     0.88    -0.38     1.89    -1.08 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##    -0.07     0.25     1.69    -1.01     0.21    -0.95    -0.15     1.63 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##    -0.59    -7.30     1.98     1.07    -1.04    -0.25    -0.30    -0.08 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##     1.21    -0.45    -2.47    -1.18    -1.22    -0.53     0.21     0.37 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##     0.04    -0.34     1.71    -2.51     1.36    -0.42    -6.15    -0.40 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##     3.81    -0.06    -0.05    -2.54    -0.55    -1.12    -3.58    -0.73 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.00     0.00     0.18    -1.09    -0.12    -0.14    -0.11     0.28 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##    -0.01     0.19     0.04     0.20     0.03     0.02    -0.05    -0.25 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.03    -0.32    -0.06    -0.01     0.05    -0.09     0.60     0.00 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.01     0.77    -0.20    -0.08    -0.17    -0.55     0.09     0.05 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##    -0.26    -0.75    -0.03     0.04    -0.31    -0.11     0.57     0.58 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##    -0.01     0.08    -3.16    -0.08    -0.02 
##    b->h3   i1->h3   i2->h3   i3->h3   i4->h3   i5->h3   i6->h3   i7->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h3   i9->h3  i10->h3  i11->h3  i12->h3  i13->h3  i14->h3  i15->h3 
##     0.00     0.00     0.00     0.01     0.00     0.00     0.00     0.00 
##  i16->h3  i17->h3  i18->h3  i19->h3  i20->h3  i21->h3  i22->h3  i23->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h3  i25->h3  i26->h3  i27->h3  i28->h3  i29->h3  i30->h3  i31->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h3  i33->h3  i34->h3  i35->h3  i36->h3  i37->h3  i38->h3  i39->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h3  i41->h3  i42->h3  i43->h3  i44->h3  i45->h3  i46->h3  i47->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h3  i49->h3  i50->h3  i51->h3  i52->h3  i53->h3  i54->h3  i55->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h3  i57->h3  i58->h3  i59->h3  i60->h3  i61->h3  i62->h3  i63->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h3  i65->h3  i66->h3  i67->h3  i68->h3  i69->h3  i70->h3  i71->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h3  i73->h3  i74->h3  i75->h3  i76->h3  i77->h3  i78->h3  i79->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h3  i81->h3  i82->h3  i83->h3  i84->h3  i85->h3  i86->h3  i87->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h3  i89->h3  i90->h3  i91->h3  i92->h3  i93->h3  i94->h3  i95->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h3  i97->h3  i98->h3  i99->h3 i100->h3 i101->h3 i102->h3 i103->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3 
##     0.00     0.00     0.00     0.00     0.00 
##  b->o h1->o h2->o h3->o 
## -2.22 -1.85  2.90  0.10
vip(Adult_TDA_KDE_5.50.5_n1_NN1Fit0,50) + ggtitle("Adult_TDA_KDE_5.50.5_n1_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_KDE_5.50.5_n1_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n1_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n1_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n1_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7211  1660
##      >50K     205   692
##                                           
##                Accuracy : 0.8091          
##                  95% CI : (0.8011, 0.8168)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.338           
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9724          
##             Specificity : 0.2942          
##          Pos Pred Value : 0.8129          
##          Neg Pred Value : 0.7715          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7382          
##    Detection Prevalence : 0.9082          
##       Balanced Accuracy : 0.6333          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n1_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7211  1660
##      >50K     205   692
##                                           
##                Accuracy : 0.8091          
##                  95% CI : (0.8011, 0.8168)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.338           
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9724          
##             Specificity : 0.2942          
##          Pos Pred Value : 0.8129          
##          Neg Pred Value : 0.7715          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7382          
##    Detection Prevalence : 0.9082          
##       Balanced Accuracy : 0.6333          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n1_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.090704e-01   3.379551e-01   8.011333e-01   8.168229e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.647199e-32  1.666515e-248
ad_tda_kde_5.50.5_n1_nn1_cf0_ov_acc<-ad_tda_kde_5.50.5_n1_nn1_cf0$overall[1]
ad_tda_kde_5.50.5_n1_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9723571            0.2942177            0.8128734 
##       Neg Pred Value            Precision               Recall 
##            0.7714604            0.8128734            0.9723571 
##                   F1           Prevalence       Detection Rate 
##            0.8854915            0.7592138            0.7382269 
## Detection Prevalence    Balanced Accuracy 
##            0.9081695            0.6332874
ad_tda_kde_5.50.5_n1_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n1_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_nn1_n1_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.50.5_n1_nn1_fit_re)
diff_tda_kde_5.50.5_nn1_n1_3_fold
##      Accuracy
## 1 -0.01922107
## 2 -0.01927605
## 3 -0.02252580
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nn1.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n1_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nn1.n1_3_fold_odds.left<-bst_tda_kde_5.50.5_nn1.n1_3_fold$probLeft/bst_tda_kde_5.50.5_nn1.n1_3_fold$probRight
bst_tda_kde_5.50.5_nn1.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nn1.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n1_3_fold
## $winLeft
## [1] 0.9093333
## 
## $winRope
## [1] 0.09066667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nn1.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n1_3_fold
## $left
## [1] 0.9927207
## 
## $rope
## [1] 0.006417089
## 
## $right
## [1] 0.0008621679
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nn1_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nn1.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1_n1_3_fold))
#bf_tda_kde_5.50.5_nn1.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nn1_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_nn1_n1_3_fold)
## t = -18.618, df = 2, p-value = 0.002872
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.02504175 -0.01564020
## sample estimates:
##   mean of x 
## -0.02034097
### Test set diff
diff_tda_kde_5.50.5_nn1.n1_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n1_nn1_cf0_ov_acc)
diff_tda_kde_5.50.5_nn1.n1_test
##   Accuracy 
## 0.01873464
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nn1.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1.n1_test),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n1_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nn1.n1_test_odds.left<-bst_tda_kde_5.50.5_nn1.n1_test$probLeft/bst_tda_kde_5.50.5_nn1.n1_test$probRight
bst_tda_kde_5.50.5_nn1.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nn1.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1.n1_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n1_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.4610333
## 
## $winRight
## [1] 0.5389667
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nn1.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nn1.n1_test)))

#BayesFactor
#bf_tda_kde_5.50.5_nn1.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1.n1_test)) #bf_tda_pca_5.50.5_nn1.n1_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nn1.n1_test))

##Node2

#Neural Network 1
Adult_TDA_KDE_5.50.5_n2_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n3.vec, 
                           Importance = T,
                     method = 'nnet', 
                     trControl = fitControl,
                     tuneGrid = nn1Grid,
                     metric='Accuracy')
## # weights:  221
## initial  value 4578.969820 
## iter  10 value 4288.745873
## iter  20 value 4246.839192
## iter  30 value 4246.740901
## iter  40 value 4245.403865
## iter  50 value 4224.590291
## iter  60 value 4204.720593
## iter  70 value 4149.236494
## iter  80 value 4049.513720
## iter  90 value 3752.181553
## iter 100 value 3445.782988
## final  value 3445.782988 
## stopped after 100 iterations
## # weights:  331
## initial  value 5693.979874 
## iter  10 value 4244.354724
## iter  20 value 4239.151522
## iter  30 value 4230.921248
## iter  40 value 4210.629995
## iter  50 value 4176.703642
## iter  60 value 4160.871568
## iter  70 value 4147.240264
## iter  80 value 3995.244187
## iter  90 value 3899.968491
## iter 100 value 3343.814248
## final  value 3343.814248 
## stopped after 100 iterations
## # weights:  551
## initial  value 7730.487351 
## iter  10 value 4330.467747
## iter  20 value 4261.026048
## iter  30 value 3858.771649
## iter  40 value 3221.312967
## iter  50 value 3021.503163
## iter  60 value 2892.038878
## iter  70 value 2862.330802
## iter  80 value 2850.099088
## iter  90 value 2822.997495
## iter 100 value 2808.876162
## final  value 2808.876162 
## stopped after 100 iterations
## # weights:  771
## initial  value 5194.587504 
## iter  10 value 4303.955716
## iter  20 value 4165.390389
## iter  30 value 4152.426308
## iter  40 value 4108.177672
## iter  50 value 4002.170100
## iter  60 value 3927.927242
## iter  70 value 3691.644491
## iter  80 value 3025.308673
## iter  90 value 2892.753752
## iter 100 value 2744.469619
## final  value 2744.469619 
## stopped after 100 iterations
## # weights:  221
## initial  value 6642.634879 
## iter  10 value 4569.361917
## iter  20 value 4567.449720
## iter  30 value 4513.267336
## iter  40 value 4260.245410
## iter  50 value 4202.089714
## iter  60 value 4176.341618
## iter  70 value 4147.030489
## iter  80 value 4063.681773
## iter  90 value 3398.968548
## iter 100 value 3112.779898
## final  value 3112.779898 
## stopped after 100 iterations
## # weights:  331
## initial  value 5071.575902 
## iter  10 value 4498.699933
## iter  20 value 4453.523795
## iter  30 value 4271.223046
## iter  40 value 3750.398538
## iter  50 value 3544.801832
## iter  60 value 3303.055411
## iter  70 value 3078.538640
## iter  80 value 2951.693617
## iter  90 value 2949.196222
## iter 100 value 2947.821673
## final  value 2947.821673 
## stopped after 100 iterations
## # weights:  551
## initial  value 5484.606512 
## iter  10 value 4524.432108
## iter  20 value 4262.848689
## iter  30 value 4252.632792
## iter  40 value 4243.994257
## iter  50 value 4228.612963
## iter  60 value 4208.702816
## iter  70 value 4167.429927
## iter  80 value 4065.921635
## iter  90 value 3710.784836
## iter 100 value 3612.164467
## final  value 3612.164467 
## stopped after 100 iterations
## # weights:  771
## initial  value 5369.184103 
## iter  10 value 4580.164420
## iter  20 value 4278.922611
## iter  30 value 4254.623825
## iter  40 value 4250.830269
## iter  50 value 4234.386426
## iter  60 value 4201.921867
## iter  70 value 4174.749047
## iter  80 value 4139.780881
## iter  90 value 4001.149136
## iter 100 value 3883.690978
## final  value 3883.690978 
## stopped after 100 iterations
## # weights:  221
## initial  value 6726.926196 
## iter  10 value 4569.034572
## iter  20 value 4486.922305
## iter  30 value 4269.166351
## iter  40 value 4257.231413
## iter  50 value 4219.891635
## iter  60 value 4177.629928
## iter  70 value 4108.266162
## iter  80 value 4069.752107
## iter  90 value 3916.830866
## iter 100 value 3355.974813
## final  value 3355.974813 
## stopped after 100 iterations
## # weights:  331
## initial  value 4606.624442 
## iter  10 value 4314.641938
## iter  20 value 4260.152681
## iter  30 value 4255.360495
## iter  40 value 4245.081141
## iter  50 value 4237.312191
## iter  60 value 4221.223967
## iter  70 value 3904.060135
## iter  80 value 3673.734791
## iter  90 value 3411.661027
## iter 100 value 3289.864117
## final  value 3289.864117 
## stopped after 100 iterations
## # weights:  551
## initial  value 4810.118216 
## iter  10 value 4571.243437
## iter  20 value 4568.141118
## iter  30 value 4524.480298
## iter  40 value 4262.269239
## iter  50 value 4229.025403
## iter  60 value 4212.563941
## iter  70 value 4124.885284
## iter  80 value 4057.162035
## iter  90 value 4050.114488
## iter 100 value 4048.964096
## final  value 4048.964096 
## stopped after 100 iterations
## # weights:  771
## initial  value 5966.575402 
## iter  10 value 4546.156559
## iter  20 value 4240.230422
## iter  30 value 4219.397148
## iter  40 value 4176.639598
## iter  50 value 4024.789490
## iter  60 value 3629.352093
## iter  70 value 3281.564644
## iter  80 value 3258.818230
## iter  90 value 3146.695987
## iter 100 value 2920.608678
## final  value 2920.608678 
## stopped after 100 iterations
## # weights:  221
## initial  value 6082.897143 
## iter  10 value 4569.484210
## iter  20 value 4371.758210
## iter  30 value 4287.670824
## iter  40 value 4280.231771
## iter  50 value 4278.572182
## iter  60 value 4278.356423
## iter  70 value 4257.569700
## iter  80 value 4182.858220
## iter  90 value 3971.222966
## iter 100 value 3688.484559
## final  value 3688.484559 
## stopped after 100 iterations
## # weights:  331
## initial  value 4589.079698 
## iter  10 value 4569.919276
## iter  20 value 4569.808690
## iter  30 value 4536.658753
## iter  40 value 4536.521322
## iter  50 value 4503.760483
## iter  60 value 4297.566640
## iter  70 value 4233.253911
## iter  80 value 4006.357386
## iter  90 value 3405.208520
## iter 100 value 3039.266450
## final  value 3039.266450 
## stopped after 100 iterations
## # weights:  551
## initial  value 4665.120895 
## iter  10 value 4293.691102
## iter  20 value 4249.108035
## iter  30 value 4220.877492
## iter  40 value 4201.536955
## iter  50 value 4139.504145
## iter  60 value 4131.965089
## iter  70 value 4092.367431
## iter  80 value 3935.228846
## iter  90 value 3910.042256
## iter 100 value 3871.085811
## final  value 3871.085811 
## stopped after 100 iterations
## # weights:  771
## initial  value 6191.154391 
## iter  10 value 4441.373522
## iter  20 value 4309.703771
## iter  30 value 4279.012777
## iter  40 value 4278.798521
## iter  50 value 4278.783768
## final  value 4278.783634 
## converged
## # weights:  221
## initial  value 4697.672237 
## iter  10 value 4405.323486
## iter  20 value 4242.115938
## iter  30 value 4180.131465
## iter  40 value 4097.057325
## iter  50 value 3686.496421
## iter  60 value 3259.979906
## iter  70 value 3083.796546
## iter  80 value 2842.408305
## iter  90 value 2729.397854
## iter 100 value 2663.398083
## final  value 2663.398083 
## stopped after 100 iterations
## # weights:  331
## initial  value 4853.475826 
## iter  10 value 4538.527989
## iter  20 value 4391.126924
## iter  30 value 4263.862816
## iter  40 value 4234.730370
## iter  50 value 4226.156683
## iter  60 value 4220.882356
## iter  70 value 4218.434003
## iter  80 value 4215.463257
## iter  90 value 4105.951487
## iter 100 value 3683.708103
## final  value 3683.708103 
## stopped after 100 iterations
## # weights:  551
## initial  value 5043.045786 
## iter  10 value 4541.415021
## iter  20 value 4303.667664
## iter  30 value 4301.311765
## iter  40 value 4282.372260
## iter  50 value 4281.318458
## iter  60 value 4281.132176
## iter  70 value 4280.828847
## iter  70 value 4280.828823
## iter  80 value 4241.904541
## iter  90 value 4210.155658
## iter 100 value 4188.615895
## final  value 4188.615895 
## stopped after 100 iterations
## # weights:  771
## initial  value 5613.882128 
## iter  10 value 4565.476416
## iter  20 value 4290.525555
## iter  30 value 4277.409564
## iter  40 value 4276.083084
## iter  50 value 4275.418387
## iter  60 value 4274.370834
## iter  70 value 4274.331841
## final  value 4274.331766 
## converged
## # weights:  221
## initial  value 5096.417925 
## iter  10 value 4468.472505
## iter  20 value 4288.417144
## iter  30 value 4160.943101
## iter  40 value 4110.328589
## iter  50 value 3878.107189
## iter  60 value 3524.252581
## iter  70 value 3314.222921
## iter  80 value 3210.598780
## iter  90 value 3116.486936
## iter 100 value 2876.159683
## final  value 2876.159683 
## stopped after 100 iterations
## # weights:  331
## initial  value 6247.830999 
## iter  10 value 4536.485099
## iter  20 value 4298.207520
## iter  30 value 4297.679990
## iter  40 value 4297.168194
## iter  50 value 4285.037129
## iter  60 value 4272.829696
## iter  70 value 4268.000758
## iter  80 value 4209.725975
## iter  90 value 3938.353915
## iter 100 value 3757.476560
## final  value 3757.476560 
## stopped after 100 iterations
## # weights:  551
## initial  value 4635.780563 
## iter  10 value 4564.240470
## iter  20 value 4529.898477
## iter  30 value 4332.144270
## iter  40 value 4283.658219
## iter  50 value 4278.492498
## iter  60 value 4277.923064
## iter  70 value 4277.643637
## final  value 4277.640515 
## converged
## # weights:  771
## initial  value 4675.268495 
## iter  10 value 4497.612370
## iter  20 value 4303.203308
## iter  30 value 4290.362127
## iter  40 value 4264.846801
## iter  50 value 4241.062487
## iter  60 value 3925.587195
## iter  70 value 3592.868641
## iter  80 value 3571.832761
## iter  90 value 3529.808652
## iter 100 value 3261.509326
## final  value 3261.509326 
## stopped after 100 iterations
## # weights:  221
## initial  value 5152.732780 
## iter  10 value 4570.130857
## iter  10 value 4570.130856
## iter  10 value 4570.130849
## final  value 4570.130849 
## converged
## # weights:  331
## initial  value 6274.913787 
## final  value 4570.084030 
## converged
## # weights:  551
## initial  value 4971.650025 
## iter  10 value 4451.229253
## iter  20 value 4190.231978
## iter  30 value 4158.482257
## iter  40 value 4140.352513
## iter  50 value 4139.252740
## iter  60 value 4125.705688
## iter  70 value 4109.670621
## iter  80 value 4088.026722
## iter  90 value 4029.419780
## iter 100 value 3995.822357
## final  value 3995.822357 
## stopped after 100 iterations
## # weights:  771
## initial  value 9656.387151 
## iter  10 value 4286.846644
## iter  20 value 4252.005534
## iter  30 value 4146.617978
## iter  40 value 4141.978400
## iter  50 value 4138.751127
## iter  60 value 4125.463976
## iter  70 value 4111.886708
## iter  80 value 4052.442984
## iter  90 value 3933.545079
## iter 100 value 3560.486266
## final  value 3560.486266 
## stopped after 100 iterations
## # weights:  221
## initial  value 4993.047106 
## iter  10 value 4570.408984
## iter  20 value 4567.654456
## iter  30 value 4407.216605
## iter  40 value 4179.709063
## iter  50 value 4167.702600
## iter  60 value 4149.429273
## iter  70 value 4108.165439
## iter  80 value 4084.614907
## iter  90 value 3413.546424
## iter 100 value 3119.924301
## final  value 3119.924301 
## stopped after 100 iterations
## # weights:  331
## initial  value 5126.899665 
## iter  10 value 4546.643758
## iter  20 value 4545.789459
## iter  30 value 4507.006901
## iter  40 value 4143.564367
## iter  50 value 4121.277516
## iter  60 value 4039.227410
## iter  70 value 3796.556011
## iter  80 value 3584.281280
## iter  90 value 3166.228602
## iter 100 value 2865.244243
## final  value 2865.244243 
## stopped after 100 iterations
## # weights:  551
## initial  value 7419.994364 
## iter  10 value 4453.906999
## iter  20 value 4338.163136
## iter  30 value 4257.250308
## iter  40 value 4242.054903
## iter  50 value 4241.261720
## iter  60 value 4241.069665
## iter  70 value 4152.516231
## iter  80 value 4147.486033
## iter  90 value 4145.995788
## iter 100 value 4144.094760
## final  value 4144.094760 
## stopped after 100 iterations
## # weights:  771
## initial  value 7805.544610 
## iter  10 value 4547.882462
## iter  20 value 4475.555813
## iter  30 value 4274.740854
## iter  40 value 4142.175777
## iter  50 value 4087.038583
## iter  60 value 4080.139801
## iter  70 value 4075.407442
## iter  80 value 4054.396810
## iter  90 value 4043.486550
## iter 100 value 4040.746718
## final  value 4040.746718 
## stopped after 100 iterations
## # weights:  221
## initial  value 5459.775388 
## iter  10 value 4570.550124
## iter  20 value 4570.319021
## iter  30 value 4570.307348
## iter  40 value 4445.519097
## iter  50 value 4384.377286
## iter  60 value 4206.396405
## iter  70 value 4103.547689
## iter  80 value 4081.664167
## iter  90 value 4037.118126
## iter 100 value 3991.589082
## final  value 3991.589082 
## stopped after 100 iterations
## # weights:  331
## initial  value 6199.265466 
## iter  10 value 4563.943123
## iter  20 value 4263.791689
## iter  30 value 4167.996107
## iter  40 value 4098.819500
## iter  50 value 3975.231576
## iter  60 value 3832.071892
## iter  70 value 3686.391575
## iter  80 value 3248.767406
## iter  90 value 2877.960767
## iter 100 value 2807.073224
## final  value 2807.073224 
## stopped after 100 iterations
## # weights:  551
## initial  value 5668.685641 
## iter  10 value 4537.158070
## iter  20 value 4235.837195
## iter  30 value 4194.473294
## iter  40 value 4176.497576
## iter  50 value 4166.207669
## iter  60 value 4015.595980
## iter  70 value 3974.754892
## iter  80 value 3911.206452
## iter  90 value 3673.773324
## iter 100 value 3440.583455
## final  value 3440.583455 
## stopped after 100 iterations
## # weights:  771
## initial  value 5456.883949 
## iter  10 value 4535.423263
## iter  20 value 4242.428661
## iter  30 value 4216.193519
## iter  40 value 4190.592443
## iter  50 value 4106.558024
## iter  60 value 3826.840487
## iter  70 value 3507.915255
## iter  80 value 3180.285582
## iter  90 value 2834.396079
## iter 100 value 2784.500157
## final  value 2784.500157 
## stopped after 100 iterations
## # weights:  221
## initial  value 7820.408026 
## iter  10 value 6832.288398
## iter  20 value 6377.739534
## iter  30 value 6300.272687
## iter  40 value 6249.591558
## iter  50 value 6226.277227
## iter  60 value 6218.013569
## iter  70 value 6181.849265
## iter  80 value 5978.891629
## iter  90 value 5129.792405
## iter 100 value 4697.767364
## final  value 4697.767364 
## stopped after 100 iterations
Adult_TDA_KDE_5.50.5_n2_NN1Fit0
## Neural Network 
## 
## 11634 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 7755, 7756, 7757 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa    
##   2     0.3    0.7623282  0.2228695
##   2     0.5    0.8059961  0.4734469
##   2     0.7    0.7975765  0.3878566
##   3     0.3    0.7688631  0.2673628
##   3     0.5    0.8058269  0.4886870
##   3     0.7    0.7918216  0.3770158
##   5     0.3    0.7922412  0.3856859
##   5     0.5    0.7647408  0.2833271
##   5     0.7    0.7640527  0.2272761
##   7     0.3    0.7896629  0.3666727
##   7     0.5    0.7669757  0.2608337
##   7     0.7    0.8005848  0.5223055
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 2 and decay = 0.5.
Adult_TDA_KDE_5.50.5_n2_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.7717307 0.2794060    Fold3
## 2 0.8336772 0.5864984    Fold2
## 3 0.8125806 0.5544361    Fold1
ad_tda_kde_5.50.5_n2_nn1_fit_re<-Adult_TDA_KDE_5.50.5_n2_NN1Fit0$resample[1]

summary(Adult_TDA_KDE_5.50.5_n2_NN1Fit0)
## a 108-2-1 network with 221 weights
## options were - entropy fitting  decay=0.5
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.00     0.00     0.00     0.13     0.00     0.00     0.00     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.03     0.00     0.01     0.00     0.00     0.00     0.00     0.00 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##     3.27    -0.13     1.10    -0.52    -0.41     0.00    -0.51    -0.65 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##     2.53     1.79    -0.05     0.00     0.00    -0.02    -0.56     0.00 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.00     0.00     0.00     2.02     2.27    -4.56     0.00     4.74 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##    -5.36     0.00     0.00     4.75     0.36     1.57    -0.13    -4.41 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.81     2.57     2.57     0.30     1.09    -0.54     0.02     0.40 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##    -3.02     4.04     0.93     1.76     4.11     0.06    -2.22    -3.48 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##    -1.11     0.15     1.09    -1.11     2.01     2.13     2.55     2.81 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##    -5.12     1.99    -0.76     0.40     1.03     0.61     2.78     0.50 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.00     0.00     0.00     0.25    -0.23    -2.47     0.93     0.73 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##    -0.47     0.13     0.41    -1.01    -0.65    -0.16    -1.26     1.92 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##    -0.40     0.44     0.00     0.07     0.07     0.67    -0.20     0.15 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##    -0.10    -0.11     0.54     0.50    -0.11    -0.01     0.13    -0.17 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.07    -0.90     0.03     0.27     0.97     0.21     1.14     0.40 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.29     0.51    -0.37     0.95     0.11 
##  b->o h1->o h2->o 
##  0.89 -0.06 -3.92
vip(Adult_TDA_KDE_5.50.5_n2_NN1Fit0,50) + ggtitle("Adult_TDA_KDE_5.50.5_n2_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_KDE_5.50.5_n2_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n2_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n2_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n2_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6006   569
##      >50K    1410  1783
##                                           
##                Accuracy : 0.7974          
##                  95% CI : (0.7893, 0.8053)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5062          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.8099          
##             Specificity : 0.7581          
##          Pos Pred Value : 0.9135          
##          Neg Pred Value : 0.5584          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6149          
##    Detection Prevalence : 0.6731          
##       Balanced Accuracy : 0.7840          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n2_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6006   569
##      >50K    1410  1783
##                                           
##                Accuracy : 0.7974          
##                  95% CI : (0.7893, 0.8053)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5062          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.8099          
##             Specificity : 0.7581          
##          Pos Pred Value : 0.9135          
##          Neg Pred Value : 0.5584          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6149          
##    Detection Prevalence : 0.6731          
##       Balanced Accuracy : 0.7840          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n2_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.973997e-01   5.061561e-01   7.892895e-01   8.053321e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.281964e-19   1.592910e-79
ad_tda_kde_5.50.5_n2_nn1_cf0_ov_acc<-ad_tda_kde_5.50.5_n2_nn1_cf0$overall[1]
ad_tda_kde_5.50.5_n2_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.8098706            0.7580782            0.9134601 
##       Neg Pred Value            Precision               Recall 
##            0.5584090            0.9134601            0.8098706 
##                   F1           Prevalence       Detection Rate 
##            0.8585519            0.7592138            0.6148649 
## Detection Prevalence    Balanced Accuracy 
##            0.6731163            0.7839744
ad_tda_kde_5.50.5_n2_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n2_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_nn1_n2_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.50.5_n2_nn1_fit_re)
diff_tda_kde_5.50.5_nn1_n2_3_fold
##       Accuracy
## 1  0.056908396
## 2  0.012577948
## 3 -0.005657688
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nn1.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nn1.n2_3_fold_odds.left<-bst_tda_kde_5.50.5_nn1.n2_3_fold$probLeft/bst_tda_kde_5.50.5_nn1.n2_3_fold$probRight
bst_tda_kde_5.50.5_nn1.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nn1.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.3928667
## 
## $winRight
## [1] 0.6071333
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nn1.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n2_3_fold
## $left
## [1] 0.1410987
## 
## $rope
## [1] 0.1846968
## 
## $right
## [1] 0.6742046
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nn1_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nn1.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1_n2_3_fold))
#bf_tda_kde_5.50.5_nn1.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nn1_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_nn1_n2_3_fold)
## t = 1.1453, df = 2, p-value = 0.3707
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.05865643  0.10120887
## sample estimates:
##  mean of x 
## 0.02127622
### Test set diff
diff_tda_kde_5.50.5_nn1.n2_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n2_nn1_cf0_ov_acc)
diff_tda_kde_5.50.5_nn1.n2_test
##   Accuracy 
## 0.03040541
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nn1.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1.n2_test),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nn1.n2_test_odds.left<-bst_tda_kde_5.50.5_nn1.n2_test$probLeft/bst_tda_kde_5.50.5_nn1.n2_test$probRight
bst_tda_kde_5.50.5_nn1.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nn1.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1.n2_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1570333
## 
## $winRight
## [1] 0.8429667
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nn1.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nn1.n2_test)))

#BayesFactor
#bf_tda_kde_5.50.5_nn1.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1.n2_test)) #bf_tda_pca_5.50.5_nn1.n2_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nn1.n2_test))

##Node3

#Neural Network 1
Adult_TDA_KDE_5.50.5_n3_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n3.vec, 
                           Importance = T,
                     method = 'nnet', 
                     trControl = fitControl,
                     tuneGrid = nn1Grid,
                     metric='Accuracy')
## # weights:  221
## initial  value 7412.470507 
## iter  10 value 4267.230875
## iter  20 value 4205.761387
## iter  30 value 4142.553586
## iter  40 value 4115.473166
## iter  50 value 4091.246873
## iter  60 value 4002.668375
## iter  70 value 3948.169741
## iter  80 value 3891.061318
## iter  90 value 3646.555355
## iter 100 value 3412.246403
## final  value 3412.246403 
## stopped after 100 iterations
## # weights:  331
## initial  value 7114.635684 
## iter  10 value 4546.404903
## iter  20 value 4203.853701
## iter  30 value 4190.730409
## iter  40 value 4180.757384
## iter  50 value 4126.092904
## iter  60 value 4097.499505
## iter  70 value 4071.408349
## iter  80 value 3914.444253
## iter  90 value 3477.687500
## iter 100 value 3016.829544
## final  value 3016.829544 
## stopped after 100 iterations
## # weights:  551
## initial  value 5147.905100 
## iter  10 value 4535.783417
## iter  20 value 4282.170028
## iter  30 value 4265.334582
## iter  40 value 4261.200400
## iter  50 value 4168.501417
## iter  60 value 4124.081285
## iter  70 value 4082.732765
## iter  80 value 3324.930915
## iter  90 value 3025.548321
## iter 100 value 2907.674829
## final  value 2907.674829 
## stopped after 100 iterations
## # weights:  771
## initial  value 5065.637065 
## iter  10 value 4356.515922
## iter  20 value 4268.318364
## iter  30 value 4260.420853
## iter  40 value 4228.983064
## iter  50 value 4191.776436
## iter  60 value 4140.767683
## iter  70 value 4127.365350
## iter  80 value 4114.750277
## iter  90 value 4107.811325
## iter 100 value 4104.769314
## final  value 4104.769314 
## stopped after 100 iterations
## # weights:  221
## initial  value 7466.657174 
## iter  10 value 4424.757029
## iter  20 value 4258.191796
## iter  30 value 4174.491671
## iter  40 value 4157.310259
## iter  50 value 4144.276201
## iter  60 value 4125.264156
## iter  70 value 4113.572516
## iter  80 value 4049.925242
## iter  90 value 3965.183168
## iter 100 value 3891.837349
## final  value 3891.837349 
## stopped after 100 iterations
## # weights:  331
## initial  value 5022.553667 
## iter  10 value 4572.802469
## iter  20 value 4568.585010
## iter  30 value 4568.536202
## iter  40 value 4313.452083
## iter  50 value 4269.612508
## iter  60 value 4247.396159
## iter  70 value 4184.131636
## iter  80 value 4080.786748
## iter  90 value 4040.272948
## iter 100 value 3622.073090
## final  value 3622.073090 
## stopped after 100 iterations
## # weights:  551
## initial  value 4593.815381 
## iter  10 value 4381.213133
## iter  20 value 4333.901083
## iter  30 value 4264.631854
## iter  40 value 4244.398424
## iter  50 value 4198.245838
## iter  60 value 4123.280995
## iter  70 value 3652.090322
## iter  80 value 3115.333235
## iter  90 value 3010.798703
## iter 100 value 2978.120606
## final  value 2978.120606 
## stopped after 100 iterations
## # weights:  771
## initial  value 6120.307493 
## iter  10 value 4357.386734
## iter  20 value 4093.057995
## iter  30 value 3631.593605
## iter  40 value 3348.014081
## iter  50 value 3019.158841
## iter  60 value 2985.297972
## iter  70 value 2880.917919
## iter  80 value 2815.336954
## iter  90 value 2797.166074
## iter 100 value 2759.344480
## final  value 2759.344480 
## stopped after 100 iterations
## # weights:  221
## initial  value 6366.721592 
## iter  10 value 4570.578400
## iter  20 value 4568.727448
## iter  30 value 4568.705860
## iter  40 value 4567.882839
## iter  50 value 4280.551403
## iter  60 value 4263.690530
## iter  70 value 4245.982500
## iter  80 value 4218.781979
## iter  90 value 4183.613514
## iter 100 value 4172.457104
## final  value 4172.457104 
## stopped after 100 iterations
## # weights:  331
## initial  value 4647.251441 
## iter  10 value 4522.805021
## iter  20 value 4284.729106
## iter  30 value 4271.327422
## iter  40 value 4171.539247
## iter  50 value 4149.305180
## iter  60 value 4098.030030
## iter  70 value 3987.134813
## iter  80 value 3392.914200
## iter  90 value 3021.720380
## iter 100 value 2842.820202
## final  value 2842.820202 
## stopped after 100 iterations
## # weights:  551
## initial  value 6961.113765 
## iter  10 value 4557.862421
## iter  20 value 4321.559772
## iter  30 value 4256.577803
## iter  40 value 4250.227187
## iter  50 value 4203.143371
## iter  60 value 4190.127478
## iter  70 value 4156.395240
## iter  80 value 4141.806587
## iter  90 value 4115.675439
## iter 100 value 4078.390715
## final  value 4078.390715 
## stopped after 100 iterations
## # weights:  771
## initial  value 7992.712593 
## iter  10 value 4309.895954
## iter  20 value 4301.190930
## iter  30 value 4192.483397
## iter  40 value 4128.915952
## iter  50 value 3981.125111
## iter  60 value 3892.523630
## iter  70 value 3590.207267
## iter  80 value 3097.804414
## iter  90 value 2841.920229
## iter 100 value 2744.196107
## final  value 2744.196107 
## stopped after 100 iterations
## # weights:  221
## initial  value 4640.114070 
## final  value 4569.807399 
## converged
## # weights:  331
## initial  value 5069.341441 
## iter  10 value 4487.865259
## iter  20 value 4332.418787
## iter  30 value 4239.303014
## iter  40 value 4211.245525
## iter  50 value 4140.408790
## iter  60 value 4045.406700
## iter  70 value 3658.604491
## iter  80 value 3288.338797
## iter  90 value 3170.917240
## iter 100 value 2997.583241
## final  value 2997.583241 
## stopped after 100 iterations
## # weights:  551
## initial  value 5183.847153 
## iter  10 value 4233.939556
## iter  20 value 4192.271963
## iter  30 value 4187.700803
## iter  40 value 4175.733413
## iter  50 value 4106.817151
## iter  60 value 4087.720895
## iter  70 value 4052.588389
## iter  80 value 4048.488209
## iter  90 value 4034.973240
## iter 100 value 4030.309109
## final  value 4030.309109 
## stopped after 100 iterations
## # weights:  771
## initial  value 4665.578155 
## iter  10 value 4307.056700
## iter  20 value 4159.620362
## iter  30 value 4086.288189
## iter  40 value 4038.272742
## iter  50 value 3885.284368
## iter  60 value 3764.979099
## iter  70 value 3100.628094
## iter  80 value 2875.881165
## iter  90 value 2846.229541
## iter 100 value 2812.390481
## final  value 2812.390481 
## stopped after 100 iterations
## # weights:  221
## initial  value 4778.442221 
## iter  10 value 4570.089598
## iter  20 value 4569.933304
## iter  30 value 4555.907574
## iter  40 value 4216.092346
## iter  50 value 4194.083869
## iter  60 value 4191.840284
## iter  70 value 4118.863664
## iter  80 value 4104.237333
## iter  90 value 4064.801352
## iter 100 value 4057.071497
## final  value 4057.071497 
## stopped after 100 iterations
## # weights:  331
## initial  value 5509.980494 
## iter  10 value 4559.306481
## iter  20 value 4337.723878
## iter  30 value 4254.081828
## iter  40 value 4242.802331
## iter  50 value 4241.562972
## iter  60 value 4202.684285
## iter  70 value 4144.043383
## iter  80 value 4102.764457
## iter  90 value 4099.704042
## iter 100 value 4096.030821
## final  value 4096.030821 
## stopped after 100 iterations
## # weights:  551
## initial  value 4635.285825 
## iter  10 value 4562.714838
## iter  20 value 4380.020330
## iter  30 value 4190.040060
## iter  40 value 4173.987113
## iter  50 value 4166.483132
## iter  60 value 4148.395644
## iter  70 value 4106.657037
## iter  80 value 4101.331707
## iter  90 value 4097.576576
## iter 100 value 4090.784203
## final  value 4090.784203 
## stopped after 100 iterations
## # weights:  771
## initial  value 5078.066171 
## iter  10 value 4574.170414
## iter  20 value 4569.812619
## iter  30 value 4569.761524
## iter  40 value 4381.725728
## iter  50 value 4283.808273
## iter  60 value 4259.922901
## iter  70 value 4192.990334
## iter  80 value 4166.678791
## iter  90 value 4137.736091
## iter 100 value 4099.902179
## final  value 4099.902179 
## stopped after 100 iterations
## # weights:  221
## initial  value 6009.391852 
## iter  10 value 4571.607869
## iter  20 value 4570.011612
## iter  30 value 4569.993077
## iter  40 value 4527.317924
## iter  50 value 4525.762456
## iter  60 value 4489.082042
## iter  70 value 4203.365672
## iter  80 value 3991.638186
## iter  90 value 3681.202435
## iter 100 value 3409.735181
## final  value 3409.735181 
## stopped after 100 iterations
## # weights:  331
## initial  value 4613.649812 
## iter  10 value 4386.062000
## iter  20 value 4282.093479
## iter  30 value 4270.164528
## iter  40 value 4194.055903
## iter  50 value 4179.107129
## iter  60 value 4161.126023
## iter  70 value 4159.516876
## iter  80 value 4158.373158
## iter  90 value 4145.523208
## iter 100 value 4136.196285
## final  value 4136.196285 
## stopped after 100 iterations
## # weights:  551
## initial  value 6295.985916 
## iter  10 value 4440.185437
## iter  20 value 4267.261246
## iter  30 value 4197.364666
## iter  40 value 4141.885198
## iter  50 value 4094.081867
## iter  60 value 4083.495447
## iter  70 value 4077.722491
## iter  80 value 4070.236389
## iter  90 value 4061.222619
## iter 100 value 3534.441884
## final  value 3534.441884 
## stopped after 100 iterations
## # weights:  771
## initial  value 4667.656022 
## iter  10 value 4565.486794
## iter  20 value 4290.575974
## iter  30 value 4237.354483
## iter  40 value 4227.845563
## iter  50 value 4212.142660
## iter  60 value 4141.467539
## iter  70 value 3872.824228
## iter  80 value 3498.180300
## iter  90 value 3064.501726
## iter 100 value 2856.495736
## final  value 2856.495736 
## stopped after 100 iterations
## # weights:  221
## initial  value 4698.607756 
## iter  10 value 4406.308576
## iter  20 value 4211.841986
## iter  30 value 4211.655013
## iter  40 value 4192.403154
## iter  50 value 4188.511215
## iter  60 value 4185.243101
## iter  70 value 4178.461265
## iter  80 value 4175.060855
## iter  90 value 4155.171921
## iter 100 value 4138.429966
## final  value 4138.429966 
## stopped after 100 iterations
## # weights:  331
## initial  value 8014.560724 
## iter  10 value 4546.707616
## iter  20 value 4253.374461
## iter  30 value 4231.285785
## iter  40 value 3931.961484
## iter  50 value 3362.379177
## iter  60 value 3234.038990
## iter  70 value 3229.882617
## iter  80 value 3214.579244
## iter  90 value 2965.979276
## iter 100 value 2732.087678
## final  value 2732.087678 
## stopped after 100 iterations
## # weights:  551
## initial  value 4953.359401 
## iter  10 value 4538.647976
## iter  20 value 4294.711882
## iter  30 value 4242.406012
## iter  40 value 4225.773767
## iter  50 value 4135.443973
## iter  60 value 4074.293299
## iter  70 value 3669.157337
## iter  80 value 3395.093676
## iter  90 value 2993.992553
## iter 100 value 2833.594259
## final  value 2833.594259 
## stopped after 100 iterations
## # weights:  771
## initial  value 5646.371007 
## iter  10 value 4429.478202
## iter  20 value 4178.722222
## iter  30 value 4166.817466
## iter  40 value 4158.006758
## iter  50 value 4118.056968
## iter  60 value 4104.648116
## iter  70 value 4101.878868
## iter  80 value 4092.088413
## iter  90 value 4062.985441
## iter 100 value 4042.031117
## final  value 4042.031117 
## stopped after 100 iterations
## # weights:  221
## initial  value 4589.182277 
## iter  10 value 4570.299971
## iter  20 value 4569.819471
## final  value 4569.813929 
## converged
## # weights:  331
## initial  value 6790.830581 
## iter  10 value 4570.139023
## iter  20 value 4543.826172
## iter  30 value 4529.375436
## iter  40 value 4420.989020
## iter  50 value 4310.302905
## iter  60 value 4182.481629
## iter  70 value 4174.547304
## iter  80 value 4168.419220
## iter  90 value 4166.519044
## iter 100 value 4135.507660
## final  value 4135.507660 
## stopped after 100 iterations
## # weights:  551
## initial  value 4893.147502 
## iter  10 value 4292.326286
## iter  20 value 4232.171514
## iter  30 value 4197.910077
## iter  40 value 4055.491823
## iter  50 value 3619.391896
## iter  60 value 3379.569464
## iter  70 value 3319.076125
## iter  80 value 3303.709731
## iter  90 value 3299.973606
## iter 100 value 3240.567407
## final  value 3240.567407 
## stopped after 100 iterations
## # weights:  771
## initial  value 6269.268926 
## iter  10 value 4272.709599
## iter  20 value 4252.351864
## iter  30 value 4251.427537
## iter  40 value 4249.881747
## iter  50 value 4249.183112
## iter  60 value 4247.932007
## iter  70 value 4242.658866
## iter  80 value 4236.083840
## iter  90 value 4214.945598
## iter 100 value 4118.956732
## final  value 4118.956732 
## stopped after 100 iterations
## # weights:  221
## initial  value 6909.261443 
## iter  10 value 4485.028669
## iter  20 value 4213.513736
## iter  30 value 4195.938086
## iter  40 value 4181.768373
## iter  50 value 4171.487443
## iter  60 value 3972.668461
## iter  70 value 3919.144225
## iter  80 value 3758.446739
## iter  90 value 3509.861640
## iter 100 value 3354.980678
## final  value 3354.980678 
## stopped after 100 iterations
## # weights:  331
## initial  value 5625.366223 
## iter  10 value 4417.189585
## iter  20 value 4378.160473
## iter  30 value 4376.755342
## iter  40 value 4273.072173
## iter  50 value 4221.627318
## iter  60 value 4220.426157
## iter  70 value 4218.264869
## iter  80 value 4171.719218
## iter  90 value 4158.367043
## iter 100 value 4157.446969
## final  value 4157.446969 
## stopped after 100 iterations
## # weights:  551
## initial  value 5401.119581 
## iter  10 value 4570.105124
## iter  20 value 4339.874070
## iter  30 value 4271.528673
## iter  40 value 4204.559479
## iter  50 value 4193.714347
## iter  60 value 4187.933163
## iter  70 value 4172.991816
## iter  80 value 4156.185556
## iter  90 value 4154.172101
## iter 100 value 4150.877174
## final  value 4150.877174 
## stopped after 100 iterations
## # weights:  771
## initial  value 5942.003919 
## iter  10 value 4559.908531
## iter  20 value 4361.311167
## iter  30 value 4186.517182
## iter  40 value 4169.880477
## iter  50 value 4156.617883
## iter  60 value 4151.325613
## iter  70 value 4100.350445
## iter  80 value 4090.425728
## iter  90 value 4077.936707
## iter 100 value 3700.047178
## final  value 3700.047178 
## stopped after 100 iterations
## # weights:  771
## initial  value 7578.739209 
## iter  10 value 6811.245614
## iter  20 value 6507.229818
## iter  30 value 6368.908979
## iter  40 value 6339.983348
## iter  50 value 6336.392618
## iter  60 value 6309.494602
## iter  70 value 6295.369777
## iter  80 value 6245.754579
## iter  90 value 6242.493054
## iter 100 value 6233.395070
## final  value 6233.395070 
## stopped after 100 iterations
Adult_TDA_KDE_5.50.5_n3_NN1Fit0
## Neural Network 
## 
## 11634 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 7755, 7756, 7757 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa    
##   2     0.3    0.7575204  0.2493056
##   2     0.5    0.7529617  0.1983997
##   2     0.7    0.7965491  0.4140765
##   3     0.3    0.8041987  0.4732673
##   3     0.5    0.7674922  0.3000316
##   3     0.7    0.7883731  0.3758278
##   5     0.3    0.8032500  0.4597113
##   5     0.5    0.7974020  0.4547382
##   5     0.7    0.7774628  0.3481146
##   7     0.3    0.7876913  0.3786681
##   7     0.5    0.7874285  0.3680633
##   7     0.7    0.8111514  0.4646650
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 7 and decay = 0.7.
Adult_TDA_KDE_5.50.5_n3_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.8323878 0.5609659    Fold2
## 2 0.8329466 0.5769633    Fold1
## 3 0.7681197 0.2560658    Fold3
ad_tda_kde_5.50.5_n3_nn1_fit_re<-Adult_TDA_KDE_5.50.5_n3_NN1Fit0$resample[1]

summary(Adult_TDA_KDE_5.50.5_n3_NN1Fit0)
## a 108-7-1 network with 771 weights
## options were - entropy fitting  decay=0.7
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.00    -0.02     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.00     0.00     0.00     0.00    -0.02     0.00     0.00     0.01 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.00    -0.01     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.00     0.00     0.00     0.01    -0.01     0.00     0.00     0.00 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00    -0.01     0.00 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.04     0.00    -0.07     0.00     0.00     0.00     0.00     0.00 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##     0.00     0.40     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.00     0.00     0.00     0.00     0.00     0.01     0.00    -0.01 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##     0.01     0.00     0.00     0.00     0.06    -0.01     0.00     0.04 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.00    -0.03     0.00     0.00     0.00    -0.01     0.00     0.00 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##     0.01     0.00     0.00     0.00    -0.01     0.00     0.01     0.00 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##     0.00     0.00     0.00     0.03    -0.02     0.00    -0.01    -0.01 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##     0.01     0.00     0.00    -0.01     0.00     0.00    -0.02     0.02 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.22     1.82     0.33     0.00     0.00     0.00     0.00     0.00 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h3   i1->h3   i2->h3   i3->h3   i4->h3   i5->h3   i6->h3   i7->h3 
##     0.00     0.07     0.00     0.00     0.00     0.00     0.01     0.00 
##   i8->h3   i9->h3  i10->h3  i11->h3  i12->h3  i13->h3  i14->h3  i15->h3 
##    -0.01     0.00     0.00    -0.01     0.00     0.00     0.00     0.00 
##  i16->h3  i17->h3  i18->h3  i19->h3  i20->h3  i21->h3  i22->h3  i23->h3 
##     0.00     0.00     0.00     0.00     0.00     0.01     0.00    -0.01 
##  i24->h3  i25->h3  i26->h3  i27->h3  i28->h3  i29->h3  i30->h3  i31->h3 
##     0.00     0.00     0.00     0.00     0.01     0.00     0.00     0.00 
##  i32->h3  i33->h3  i34->h3  i35->h3  i36->h3  i37->h3  i38->h3  i39->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h3  i41->h3  i42->h3  i43->h3  i44->h3  i45->h3  i46->h3  i47->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h3  i49->h3  i50->h3  i51->h3  i52->h3  i53->h3  i54->h3  i55->h3 
##     0.01     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h3  i57->h3  i58->h3  i59->h3  i60->h3  i61->h3  i62->h3  i63->h3 
##     0.00     0.00     0.00    -0.01     0.00     0.00     0.00     0.00 
##  i64->h3  i65->h3  i66->h3  i67->h3  i68->h3  i69->h3  i70->h3  i71->h3 
##     0.02     0.97    -0.15     0.00     0.00     0.00     0.00     0.00 
##  i72->h3  i73->h3  i74->h3  i75->h3  i76->h3  i77->h3  i78->h3  i79->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h3  i81->h3  i82->h3  i83->h3  i84->h3  i85->h3  i86->h3  i87->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h3  i89->h3  i90->h3  i91->h3  i92->h3  i93->h3  i94->h3  i95->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h3  i97->h3  i98->h3  i99->h3 i100->h3 i101->h3 i102->h3 i103->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h4   i1->h4   i2->h4   i3->h4   i4->h4   i5->h4   i6->h4   i7->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h4   i9->h4  i10->h4  i11->h4  i12->h4  i13->h4  i14->h4  i15->h4 
##     0.00     0.00     0.00     0.22     0.00     0.00     0.00     0.00 
##  i16->h4  i17->h4  i18->h4  i19->h4  i20->h4  i21->h4  i22->h4  i23->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h4  i25->h4  i26->h4  i27->h4  i28->h4  i29->h4  i30->h4  i31->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h4  i33->h4  i34->h4  i35->h4  i36->h4  i37->h4  i38->h4  i39->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h4  i41->h4  i42->h4  i43->h4  i44->h4  i45->h4  i46->h4  i47->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h4  i49->h4  i50->h4  i51->h4  i52->h4  i53->h4  i54->h4  i55->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h4  i57->h4  i58->h4  i59->h4  i60->h4  i61->h4  i62->h4  i63->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h4  i65->h4  i66->h4  i67->h4  i68->h4  i69->h4  i70->h4  i71->h4 
##     0.03     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h4  i73->h4  i74->h4  i75->h4  i76->h4  i77->h4  i78->h4  i79->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h4  i81->h4  i82->h4  i83->h4  i84->h4  i85->h4  i86->h4  i87->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h4  i89->h4  i90->h4  i91->h4  i92->h4  i93->h4  i94->h4  i95->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h4  i97->h4  i98->h4  i99->h4 i100->h4 i101->h4 i102->h4 i103->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h5   i1->h5   i2->h5   i3->h5   i4->h5   i5->h5   i6->h5   i7->h5 
##     0.02     1.04     0.03     0.00     0.00     0.00     0.14    -0.11 
##   i8->h5   i9->h5  i10->h5  i11->h5  i12->h5  i13->h5  i14->h5  i15->h5 
##    -0.04     0.01     0.00     0.01     0.00     0.00     0.00     0.00 
##  i16->h5  i17->h5  i18->h5  i19->h5  i20->h5  i21->h5  i22->h5  i23->h5 
##     0.00     0.00     0.00     0.00    -0.10     0.21     0.00    -0.05 
##  i24->h5  i25->h5  i26->h5  i27->h5  i28->h5  i29->h5  i30->h5  i31->h5 
##    -0.14     0.00     0.00     0.09     0.22     0.29     0.00    -0.12 
##  i32->h5  i33->h5  i34->h5  i35->h5  i36->h5  i37->h5  i38->h5  i39->h5 
##     0.00    -0.16     0.00     0.00     0.03     0.19     0.00    -0.08 
##  i40->h5  i41->h5  i42->h5  i43->h5  i44->h5  i45->h5  i46->h5  i47->h5 
##    -0.09     0.00    -0.09     0.01     0.00     0.00    -0.03     0.00 
##  i48->h5  i49->h5  i50->h5  i51->h5  i52->h5  i53->h5  i54->h5  i55->h5 
##     0.09     0.00     0.00    -0.14     0.14     0.00     0.00     0.00 
##  i56->h5  i57->h5  i58->h5  i59->h5  i60->h5  i61->h5  i62->h5  i63->h5 
##     0.02     0.00    -0.08     0.00     0.00     0.10     0.16    -0.14 
##  i64->h5  i65->h5  i66->h5  i67->h5  i68->h5  i69->h5  i70->h5  i71->h5 
##    -0.43     0.00     0.72     0.02     0.00     0.00     0.00     0.00 
##  i72->h5  i73->h5  i74->h5  i75->h5  i76->h5  i77->h5  i78->h5  i79->h5 
##     0.00     0.00     0.00     0.00     0.00    -0.11     0.00     0.00 
##  i80->h5  i81->h5  i82->h5  i83->h5  i84->h5  i85->h5  i86->h5  i87->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h5  i89->h5  i90->h5  i91->h5  i92->h5  i93->h5  i94->h5  i95->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h5  i97->h5  i98->h5  i99->h5 i100->h5 i101->h5 i102->h5 i103->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5 
##     0.00     0.00     0.11     0.00     0.00 
##    b->h6   i1->h6   i2->h6   i3->h6   i4->h6   i5->h6   i6->h6   i7->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h6   i9->h6  i10->h6  i11->h6  i12->h6  i13->h6  i14->h6  i15->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h6  i17->h6  i18->h6  i19->h6  i20->h6  i21->h6  i22->h6  i23->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h6  i25->h6  i26->h6  i27->h6  i28->h6  i29->h6  i30->h6  i31->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h6  i33->h6  i34->h6  i35->h6  i36->h6  i37->h6  i38->h6  i39->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h6  i41->h6  i42->h6  i43->h6  i44->h6  i45->h6  i46->h6  i47->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h6  i49->h6  i50->h6  i51->h6  i52->h6  i53->h6  i54->h6  i55->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h6  i57->h6  i58->h6  i59->h6  i60->h6  i61->h6  i62->h6  i63->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h6  i65->h6  i66->h6  i67->h6  i68->h6  i69->h6  i70->h6  i71->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h6  i73->h6  i74->h6  i75->h6  i76->h6  i77->h6  i78->h6  i79->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h6  i81->h6  i82->h6  i83->h6  i84->h6  i85->h6  i86->h6  i87->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h6  i89->h6  i90->h6  i91->h6  i92->h6  i93->h6  i94->h6  i95->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h6  i97->h6  i98->h6  i99->h6 i100->h6 i101->h6 i102->h6 i103->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h6 i105->h6 i106->h6 i107->h6 i108->h6 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h7   i1->h7   i2->h7   i3->h7   i4->h7   i5->h7   i6->h7   i7->h7 
##    -0.02    -0.30     0.00     0.00     0.00     0.00    -0.02     0.00 
##   i8->h7   i9->h7  i10->h7  i11->h7  i12->h7  i13->h7  i14->h7  i15->h7 
##     0.00     0.00     0.00    -0.06     0.00     0.00     0.00     0.00 
##  i16->h7  i17->h7  i18->h7  i19->h7  i20->h7  i21->h7  i22->h7  i23->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h7  i25->h7  i26->h7  i27->h7  i28->h7  i29->h7  i30->h7  i31->h7 
##     0.00     0.00     0.00    -0.02    -0.19     0.00     0.00     0.02 
##  i32->h7  i33->h7  i34->h7  i35->h7  i36->h7  i37->h7  i38->h7  i39->h7 
##     0.00    -0.04     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h7  i41->h7  i42->h7  i43->h7  i44->h7  i45->h7  i46->h7  i47->h7 
##     0.01     0.00     0.00     0.00     0.00     0.00    -0.04     0.00 
##  i48->h7  i49->h7  i50->h7  i51->h7  i52->h7  i53->h7  i54->h7  i55->h7 
##     0.01     0.00     0.00     0.02    -0.04     0.00     0.00     0.00 
##  i56->h7  i57->h7  i58->h7  i59->h7  i60->h7  i61->h7  i62->h7  i63->h7 
##     0.00     0.00     0.00     0.00     0.00    -0.02    -0.04     0.02 
##  i64->h7  i65->h7  i66->h7  i67->h7  i68->h7  i69->h7  i70->h7  i71->h7 
##     0.35     0.00     0.17     0.01     0.00     0.00     0.00     0.00 
##  i72->h7  i73->h7  i74->h7  i75->h7  i76->h7  i77->h7  i78->h7  i79->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h7  i81->h7  i82->h7  i83->h7  i84->h7  i85->h7  i86->h7  i87->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h7  i89->h7  i90->h7  i91->h7  i92->h7  i93->h7  i94->h7  i95->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h7  i97->h7  i98->h7  i99->h7 i100->h7 i101->h7 i102->h7 i103->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h7 i105->h7 i106->h7 i107->h7 i108->h7 
##     0.00     0.00    -0.03     0.00     0.00 
##  b->o h1->o h2->o h3->o h4->o h5->o h6->o h7->o 
##  0.41  0.44  0.55  1.13  0.11 -2.22 -0.02  0.51
vip(Adult_TDA_KDE_5.50.5_n3_NN1Fit0,50) + ggtitle("Adult_TDA_KDE_5.50.5_n3_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_KDE_5.50.5_n3_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n3_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n3_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n3_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7168  1717
##      >50K     248   635
##                                           
##                Accuracy : 0.7988          
##                  95% CI : (0.7907, 0.8067)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.3007          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9666          
##             Specificity : 0.2700          
##          Pos Pred Value : 0.8068          
##          Neg Pred Value : 0.7191          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7338          
##    Detection Prevalence : 0.9096          
##       Balanced Accuracy : 0.6183          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n3_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7168  1717
##      >50K     248   635
##                                           
##                Accuracy : 0.7988          
##                  95% CI : (0.7907, 0.8067)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.3007          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9666          
##             Specificity : 0.2700          
##          Pos Pred Value : 0.8068          
##          Neg Pred Value : 0.7191          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7338          
##    Detection Prevalence : 0.9096          
##       Balanced Accuracy : 0.6183          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n3_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.988329e-01   3.006549e-01   7.907434e-01   8.067438e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   5.234587e-21  1.718627e-240
ad_tda_kde_5.50.5_n3_nn1_cf0_ov_acc<-ad_tda_kde_5.50.5_n3_nn1_cf0$overall[1]
ad_tda_kde_5.50.5_n3_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9665588            0.2699830            0.8067530 
##       Neg Pred Value            Precision               Recall 
##            0.7191393            0.8067530            0.9665588 
##                   F1           Prevalence       Detection Rate 
##            0.8794552            0.7592138            0.7338247 
## Detection Prevalence    Balanced Accuracy 
##            0.9096028            0.6182709
ad_tda_kde_5.50.5_n3_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n3_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_nn1_n3_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.50.5_n3_nn1_fit_re)
diff_tda_kde_5.50.5_nn1_n3_3_fold
##       Accuracy
## 1 -0.003748713
## 2  0.013308465
## 3  0.038803194
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nn1.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n3_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nn1.n3_3_fold_odds.left<-bst_tda_kde_5.50.5_nn1.n3_3_fold$probLeft/bst_tda_kde_5.50.5_nn1.n3_3_fold$probRight
bst_tda_kde_5.50.5_nn1.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nn1.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n3_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.3938333
## 
## $winRight
## [1] 0.6061667
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nn1.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n3_3_fold
## $left
## [1] 0.1043999
## 
## $rope
## [1] 0.2505368
## 
## $right
## [1] 0.6450633
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nn1_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nn1.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1_n3_3_fold))
#bf_tda_kde_5.50.5_nn1.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nn1_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_nn1_n3_3_fold)
## t = 1.3039, df = 2, p-value = 0.3222
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.03707663  0.06931860
## sample estimates:
##  mean of x 
## 0.01612098
### Test set diff
diff_tda_kde_5.50.5_nn1.n3_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n3_nn1_cf0_ov_acc)
diff_tda_kde_5.50.5_nn1.n3_test
##   Accuracy 
## 0.02897215
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nn1.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1.n3_test),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n3_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nn1.n3_test_odds.left<-bst_tda_kde_5.50.5_nn1.n3_test$probLeft/bst_tda_kde_5.50.5_nn1.n3_test$probRight
bst_tda_kde_5.50.5_nn1.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nn1.n3_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1.n3_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n3_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1585
## 
## $winRight
## [1] 0.8415
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nn1.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nn1.n3_test)))

#BayesFactor
#bf_tda_kde_5.50.5_nn1.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1.n3_test)) #bf_tda_pca_5.50.5_nn1.n3_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nn1.n3_test))

##Node4

#Neural Network 1
Adult_TDA_KDE_5.50.5_n4_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n4.vec, 
                           Importance = T,
                     method = 'nnet', 
                     trControl = fitControl,
                     tuneGrid = nn1Grid,
                     metric='Accuracy')
## # weights:  221
## initial  value 5739.281512 
## iter  10 value 3432.601305
## iter  20 value 3432.590216
## iter  30 value 3432.515145
## final  value 3432.513480 
## converged
## # weights:  331
## initial  value 3450.518076 
## iter  10 value 3341.946849
## iter  20 value 3226.640711
## iter  30 value 3194.340060
## iter  40 value 3165.005668
## iter  50 value 3151.477553
## iter  60 value 3143.784828
## iter  70 value 3126.374783
## iter  80 value 3084.365997
## iter  90 value 2675.818819
## iter 100 value 2260.386954
## final  value 2260.386954 
## stopped after 100 iterations
## # weights:  551
## initial  value 3481.334792 
## iter  10 value 3363.979301
## iter  20 value 3243.896940
## iter  30 value 3220.757619
## iter  40 value 3209.696549
## iter  50 value 3206.312986
## iter  60 value 3185.351161
## iter  70 value 3098.510879
## iter  80 value 2937.633414
## iter  90 value 2510.093019
## iter 100 value 2268.828289
## final  value 2268.828289 
## stopped after 100 iterations
## # weights:  771
## initial  value 5133.083721 
## iter  10 value 3412.979213
## iter  20 value 3213.986857
## iter  30 value 3117.205876
## iter  40 value 3096.386761
## iter  50 value 3015.328138
## iter  60 value 2894.762950
## iter  70 value 2534.378123
## iter  80 value 2181.018128
## iter  90 value 2072.737869
## iter 100 value 2064.979188
## final  value 2064.979188 
## stopped after 100 iterations
## # weights:  221
## initial  value 5725.869290 
## iter  10 value 3433.345928
## iter  20 value 3432.785068
## iter  30 value 3432.778596
## iter  30 value 3432.778569
## iter  30 value 3432.778563
## final  value 3432.778563 
## converged
## # weights:  331
## initial  value 4586.983193 
## iter  10 value 3386.663792
## iter  20 value 3204.888886
## iter  30 value 3191.651712
## iter  40 value 3190.459578
## iter  50 value 3188.602156
## iter  60 value 3187.215612
## iter  70 value 3187.027871
## final  value 3187.025599 
## converged
## # weights:  551
## initial  value 5901.508141 
## iter  10 value 3289.670713
## iter  20 value 3287.986466
## iter  30 value 3287.776842
## iter  40 value 3287.441148
## iter  50 value 3246.801746
## iter  60 value 3214.778119
## iter  70 value 3163.434840
## iter  80 value 3157.547776
## iter  90 value 3156.575501
## iter 100 value 3155.413311
## final  value 3155.413311 
## stopped after 100 iterations
## # weights:  771
## initial  value 3799.109582 
## iter  10 value 3222.883339
## iter  20 value 3196.882939
## iter  30 value 3182.818440
## iter  40 value 3172.497276
## iter  50 value 3152.453050
## iter  60 value 3147.608524
## iter  70 value 3144.441809
## iter  80 value 3093.903896
## iter  90 value 3000.990871
## iter 100 value 2814.517422
## final  value 2814.517422 
## stopped after 100 iterations
## # weights:  221
## initial  value 6470.451530 
## iter  10 value 3433.261482
## iter  20 value 3433.242319
## iter  30 value 3422.603661
## iter  40 value 3224.507871
## iter  50 value 3221.698690
## iter  60 value 3210.318176
## iter  70 value 3181.523810
## iter  80 value 3030.962010
## iter  90 value 2901.840716
## iter 100 value 2850.965347
## final  value 2850.965347 
## stopped after 100 iterations
## # weights:  331
## initial  value 5649.842980 
## iter  10 value 3416.712045
## iter  20 value 3196.424761
## iter  30 value 3187.145246
## iter  40 value 3168.144759
## iter  50 value 2907.364285
## iter  60 value 2714.574292
## iter  70 value 2570.658358
## iter  80 value 2505.827709
## iter  90 value 2462.142673
## iter 100 value 2421.461276
## final  value 2421.461276 
## stopped after 100 iterations
## # weights:  551
## initial  value 4052.239231 
## iter  10 value 3415.406693
## iter  20 value 3346.395549
## iter  30 value 3283.281501
## iter  40 value 2965.985151
## iter  50 value 2845.465094
## iter  60 value 2802.139758
## iter  70 value 2483.521063
## iter  80 value 2143.156437
## iter  90 value 2089.615078
## iter 100 value 2085.077917
## final  value 2085.077917 
## stopped after 100 iterations
## # weights:  771
## initial  value 5918.571805 
## iter  10 value 3459.592492
## iter  20 value 3398.022557
## iter  30 value 3204.875050
## iter  40 value 3194.308070
## iter  50 value 3188.657602
## iter  60 value 3166.729518
## iter  70 value 3157.614683
## iter  80 value 3123.678196
## iter  90 value 3110.377445
## iter 100 value 3106.612637
## final  value 3106.612637 
## stopped after 100 iterations
## # weights:  221
## initial  value 4985.662268 
## iter  10 value 3406.587356
## iter  20 value 3254.581342
## iter  30 value 3192.982901
## iter  40 value 3187.298890
## iter  50 value 3181.065538
## iter  60 value 3130.339159
## iter  70 value 3107.168343
## iter  80 value 2994.181793
## iter  90 value 2567.637557
## iter 100 value 2410.906581
## final  value 2410.906581 
## stopped after 100 iterations
## # weights:  331
## initial  value 5251.543139 
## iter  10 value 3198.793024
## iter  20 value 3171.813244
## iter  30 value 3168.376437
## iter  40 value 3157.281212
## iter  50 value 3144.167566
## iter  60 value 3113.045247
## iter  70 value 3094.291351
## iter  80 value 3086.168782
## iter  90 value 3069.147230
## iter 100 value 3057.601310
## final  value 3057.601310 
## stopped after 100 iterations
## # weights:  551
## initial  value 4242.602357 
## iter  10 value 3406.723963
## iter  20 value 3180.206248
## iter  30 value 3144.876508
## iter  40 value 3054.163376
## iter  50 value 2836.610838
## iter  60 value 2488.014532
## iter  70 value 2178.205320
## iter  80 value 2051.968034
## iter  90 value 2005.046786
## iter 100 value 1972.229363
## final  value 1972.229363 
## stopped after 100 iterations
## # weights:  771
## initial  value 3705.296238 
## iter  10 value 3220.692749
## iter  20 value 3197.029921
## iter  30 value 3132.419701
## iter  40 value 3001.678314
## iter  50 value 2835.038653
## iter  60 value 2747.318658
## iter  70 value 2277.520531
## iter  80 value 2125.005006
## iter  90 value 2038.188064
## iter 100 value 2022.907251
## final  value 2022.907251 
## stopped after 100 iterations
## # weights:  221
## initial  value 3810.044011 
## iter  10 value 3432.093392
## iter  20 value 3311.873578
## iter  30 value 3275.821821
## iter  40 value 3275.663747
## iter  50 value 3188.974361
## iter  60 value 3167.391231
## iter  70 value 3161.844708
## iter  80 value 3161.080743
## iter  90 value 3161.030751
## iter  90 value 3161.030747
## iter  90 value 3161.030747
## final  value 3161.030747 
## converged
## # weights:  331
## initial  value 5119.457146 
## iter  10 value 3407.890927
## iter  20 value 3213.500860
## iter  30 value 3153.476266
## iter  40 value 3142.115584
## iter  50 value 3137.662985
## iter  60 value 3134.534136
## iter  70 value 3127.325580
## iter  80 value 3081.814574
## iter  90 value 2851.313826
## iter 100 value 2778.127530
## final  value 2778.127530 
## stopped after 100 iterations
## # weights:  551
## initial  value 3739.077690 
## iter  10 value 3409.234620
## iter  20 value 3184.793828
## iter  30 value 3171.101876
## iter  40 value 3169.711353
## iter  50 value 3167.760372
## iter  60 value 3165.231782
## iter  70 value 3117.790536
## iter  80 value 3107.810737
## iter  90 value 3097.632866
## iter 100 value 3095.884500
## final  value 3095.884500 
## stopped after 100 iterations
## # weights:  771
## initial  value 4130.144297 
## iter  10 value 3316.485137
## iter  20 value 3219.276039
## iter  30 value 3181.471810
## iter  40 value 3181.150760
## iter  50 value 3178.835029
## iter  60 value 3170.331172
## iter  70 value 3167.270638
## iter  80 value 3164.012849
## iter  90 value 3153.558596
## iter 100 value 3152.959356
## final  value 3152.959356 
## stopped after 100 iterations
## # weights:  221
## initial  value 6884.284514 
## iter  10 value 3432.313883
## iter  20 value 3428.717757
## iter  30 value 3277.427953
## iter  40 value 3265.472836
## iter  50 value 3200.167927
## iter  60 value 3198.024588
## iter  70 value 3159.940962
## iter  80 value 3137.810555
## iter  90 value 3109.357525
## iter 100 value 3103.465767
## final  value 3103.465767 
## stopped after 100 iterations
## # weights:  331
## initial  value 4829.081789 
## iter  10 value 3301.675316
## iter  20 value 3190.153191
## iter  30 value 3175.838379
## iter  40 value 3167.331351
## iter  50 value 3163.593063
## iter  60 value 3162.692347
## iter  70 value 3160.778609
## iter  80 value 3149.475137
## iter  90 value 3129.427416
## iter 100 value 3114.338071
## final  value 3114.338071 
## stopped after 100 iterations
## # weights:  551
## initial  value 3540.390953 
## iter  10 value 3273.591338
## iter  20 value 3178.375256
## iter  30 value 3169.700267
## iter  40 value 3162.364890
## iter  50 value 3162.228570
## iter  60 value 3161.837130
## iter  70 value 3128.478032
## iter  80 value 3100.259942
## iter  90 value 3083.159651
## iter 100 value 3061.989222
## final  value 3061.989222 
## stopped after 100 iterations
## # weights:  771
## initial  value 3584.019026 
## iter  10 value 3412.165441
## iter  20 value 3409.367353
## iter  30 value 3397.211063
## iter  40 value 3299.058350
## iter  50 value 3238.218726
## iter  60 value 3195.961334
## iter  70 value 3156.391264
## iter  80 value 3117.297208
## iter  90 value 3071.980941
## iter 100 value 2936.549123
## final  value 2936.549123 
## stopped after 100 iterations
## # weights:  221
## initial  value 4919.608577 
## iter  10 value 3422.659695
## iter  20 value 3193.599264
## iter  30 value 3101.697611
## iter  40 value 2892.221606
## iter  50 value 2522.480328
## iter  60 value 2383.253066
## iter  70 value 2284.067196
## iter  80 value 2164.057123
## iter  90 value 2040.110581
## iter 100 value 1988.460685
## final  value 1988.460685 
## stopped after 100 iterations
## # weights:  331
## initial  value 3482.789483 
## iter  10 value 3423.011587
## iter  20 value 3213.542123
## iter  30 value 3186.712387
## iter  40 value 3166.732167
## iter  50 value 3077.651261
## iter  60 value 3049.055296
## iter  70 value 2806.979761
## iter  80 value 2425.944635
## iter  90 value 2202.129618
## iter 100 value 2170.358716
## final  value 2170.358716 
## stopped after 100 iterations
## # weights:  551
## initial  value 7764.047808 
## iter  10 value 3418.037478
## iter  20 value 3296.831403
## iter  30 value 3202.206498
## iter  40 value 3198.453406
## iter  50 value 3192.991746
## iter  60 value 3184.437752
## iter  70 value 3151.225245
## iter  80 value 3059.952375
## iter  90 value 3017.872404
## iter 100 value 2908.463666
## final  value 2908.463666 
## stopped after 100 iterations
## # weights:  771
## initial  value 3533.219461 
## iter  10 value 3359.850030
## iter  20 value 3216.537091
## iter  30 value 3204.256279
## iter  40 value 3154.016035
## iter  50 value 3148.502897
## iter  60 value 3143.103330
## iter  70 value 2915.842873
## iter  80 value 2445.549042
## iter  90 value 2331.979098
## iter 100 value 2290.704761
## final  value 2290.704761 
## stopped after 100 iterations
## # weights:  221
## initial  value 5640.039318 
## iter  10 value 3432.458738
## iter  20 value 3419.936974
## iter  30 value 3230.054366
## iter  40 value 3185.659763
## iter  50 value 3125.218967
## iter  60 value 2764.078395
## iter  70 value 2551.470777
## iter  80 value 2235.565626
## iter  90 value 2152.103686
## iter 100 value 2145.637166
## final  value 2145.637166 
## stopped after 100 iterations
## # weights:  331
## initial  value 4334.585138 
## iter  10 value 3380.778017
## iter  20 value 3230.986055
## iter  30 value 3158.903320
## iter  40 value 3133.427884
## iter  50 value 3075.523430
## iter  60 value 3017.364163
## iter  70 value 2872.511040
## iter  80 value 2557.026787
## iter  90 value 2354.912691
## iter 100 value 2281.255712
## final  value 2281.255712 
## stopped after 100 iterations
## # weights:  551
## initial  value 4908.127613 
## iter  10 value 3432.850919
## iter  20 value 3431.465120
## iter  30 value 3431.448588
## iter  40 value 3391.253860
## iter  50 value 3253.958662
## iter  60 value 3234.885305
## iter  70 value 3222.927534
## iter  80 value 3206.827815
## iter  90 value 3107.860947
## iter 100 value 2727.685412
## final  value 2727.685412 
## stopped after 100 iterations
## # weights:  771
## initial  value 3908.111529 
## iter  10 value 3309.312080
## iter  20 value 3295.153160
## iter  30 value 3213.355031
## iter  40 value 3192.874131
## iter  50 value 3187.021445
## iter  60 value 3133.583477
## iter  70 value 3067.226765
## iter  80 value 3055.803126
## iter  90 value 3048.566165
## iter 100 value 3034.216239
## final  value 3034.216239 
## stopped after 100 iterations
## # weights:  221
## initial  value 4075.563457 
## iter  10 value 3432.370516
## iter  20 value 3431.634426
## iter  30 value 3431.625910
## final  value 3431.625839 
## converged
## # weights:  331
## initial  value 6386.033847 
## iter  10 value 3425.441704
## iter  20 value 3357.838206
## iter  30 value 3230.540251
## iter  40 value 3113.085735
## iter  50 value 3006.388610
## iter  60 value 2847.370343
## iter  70 value 2550.919182
## iter  80 value 2383.551243
## iter  90 value 2278.314688
## iter 100 value 2150.715107
## final  value 2150.715107 
## stopped after 100 iterations
## # weights:  551
## initial  value 3930.514926 
## iter  10 value 3433.365069
## iter  20 value 3431.645895
## iter  30 value 3431.626042
## iter  40 value 3431.596593
## iter  50 value 3226.288461
## iter  60 value 3222.547706
## iter  70 value 3187.020199
## iter  80 value 3091.488115
## iter  90 value 2765.779829
## iter 100 value 2586.354693
## final  value 2586.354693 
## stopped after 100 iterations
## # weights:  771
## initial  value 8343.579679 
## iter  10 value 3431.918462
## iter  20 value 3430.843156
## iter  30 value 3429.632044
## iter  40 value 3398.898454
## iter  50 value 3146.853020
## iter  60 value 3067.095610
## iter  70 value 2931.631860
## iter  80 value 2837.089729
## iter  90 value 2491.017877
## iter 100 value 2295.855164
## final  value 2295.855164 
## stopped after 100 iterations
## # weights:  771
## initial  value 5261.960526 
## iter  10 value 5103.882260
## iter  20 value 4830.194492
## iter  30 value 4712.332379
## iter  40 value 4706.017582
## iter  50 value 4702.962005
## iter  60 value 4693.893457
## iter  70 value 4689.557466
## iter  80 value 4677.593978
## iter  90 value 4668.091074
## iter 100 value 4662.846200
## final  value 4662.846200 
## stopped after 100 iterations
Adult_TDA_KDE_5.50.5_n4_NN1Fit0
## Neural Network 
## 
## 10038 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 6692, 6692, 6692 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa    
##   2     0.3    0.8311417  0.3426364
##   2     0.5    0.8125125  0.2148859
##   2     0.7    0.8136083  0.1961233
##   3     0.3    0.8262602  0.3795918
##   3     0.5    0.8233712  0.3306511
##   3     0.7    0.8185894  0.2412877
##   5     0.3    0.8345288  0.4235930
##   5     0.5    0.8136083  0.3003756
##   5     0.7    0.8313409  0.3149499
##   7     0.3    0.8406057  0.4273529
##   7     0.5    0.8180913  0.2282293
##   7     0.7    0.8197848  0.2276777
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 7 and decay = 0.3.
Adult_TDA_KDE_5.50.5_n4_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.8493724 0.5189879    Fold2
## 2 0.8547519 0.5402279    Fold1
## 3 0.8176928 0.2228429    Fold3
ad_tda_kde_5.50.5_n4_nn1_fit_re<-Adult_TDA_KDE_5.50.5_n4_NN1Fit0$resample[1]

summary(Adult_TDA_KDE_5.50.5_n4_NN1Fit0)
## a 108-7-1 network with 771 weights
## options were - entropy fitting  decay=0.3
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.01     0.24     0.00     0.00     0.00     0.00     0.02     0.00 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00     0.00     0.00    -0.01     0.00     0.04 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.00     0.00     0.00    -0.01     0.08     0.00     0.00     0.02 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.00    -0.01     0.00     0.00     0.00    -0.01     0.00     0.05 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##    -0.01     0.00     0.00     0.00     0.00     0.00    -0.03     0.00 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.00     0.01    -0.01     0.03     0.00     0.00     0.00    -0.01 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.00     0.00     0.00     0.00     0.00     0.02    -0.01     0.02 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##    -0.06     0.00     0.29     0.00     0.00     0.00     0.00     0.00 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.00     0.06     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.00    -0.05     0.00     0.00 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##    -0.03    -0.22     0.02     0.00     0.00     0.00    -0.02     0.02 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##    -0.05     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.00     0.00     0.00     0.00     0.00     0.02     0.00     0.07 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##     0.00     0.00     0.00    -0.12    -0.26    -0.01     0.00     0.02 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.00    -0.04     0.00     0.00     0.02     0.03     0.00     0.06 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##     0.03    -0.04     0.00     0.00     0.01     0.00    -0.10     0.00 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##    -0.03     0.00     0.00     0.08    -0.01     0.00    -0.06     0.02 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##    -0.06     0.00     0.00     0.01     0.00    -0.03    -0.12     0.09 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.08     0.59    -0.35     0.00     0.00     0.00     0.00     0.00 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##     0.00     0.00     0.00     0.00    -0.01     0.00     0.00     0.00 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.00     0.00    -0.02     0.00     0.00 
##    b->h3   i1->h3   i2->h3   i3->h3   i4->h3   i5->h3   i6->h3   i7->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h3   i9->h3  i10->h3  i11->h3  i12->h3  i13->h3  i14->h3  i15->h3 
##     0.00     0.00     0.00     0.01     0.00     0.00     0.00     0.00 
##  i16->h3  i17->h3  i18->h3  i19->h3  i20->h3  i21->h3  i22->h3  i23->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h3  i25->h3  i26->h3  i27->h3  i28->h3  i29->h3  i30->h3  i31->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h3  i33->h3  i34->h3  i35->h3  i36->h3  i37->h3  i38->h3  i39->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h3  i41->h3  i42->h3  i43->h3  i44->h3  i45->h3  i46->h3  i47->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h3  i49->h3  i50->h3  i51->h3  i52->h3  i53->h3  i54->h3  i55->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h3  i57->h3  i58->h3  i59->h3  i60->h3  i61->h3  i62->h3  i63->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h3  i65->h3  i66->h3  i67->h3  i68->h3  i69->h3  i70->h3  i71->h3 
##     0.72     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h3  i73->h3  i74->h3  i75->h3  i76->h3  i77->h3  i78->h3  i79->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h3  i81->h3  i82->h3  i83->h3  i84->h3  i85->h3  i86->h3  i87->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h3  i89->h3  i90->h3  i91->h3  i92->h3  i93->h3  i94->h3  i95->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h3  i97->h3  i98->h3  i99->h3 i100->h3 i101->h3 i102->h3 i103->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h4   i1->h4   i2->h4   i3->h4   i4->h4   i5->h4   i6->h4   i7->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h4   i9->h4  i10->h4  i11->h4  i12->h4  i13->h4  i14->h4  i15->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h4  i17->h4  i18->h4  i19->h4  i20->h4  i21->h4  i22->h4  i23->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h4  i25->h4  i26->h4  i27->h4  i28->h4  i29->h4  i30->h4  i31->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h4  i33->h4  i34->h4  i35->h4  i36->h4  i37->h4  i38->h4  i39->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h4  i41->h4  i42->h4  i43->h4  i44->h4  i45->h4  i46->h4  i47->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h4  i49->h4  i50->h4  i51->h4  i52->h4  i53->h4  i54->h4  i55->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h4  i57->h4  i58->h4  i59->h4  i60->h4  i61->h4  i62->h4  i63->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h4  i65->h4  i66->h4  i67->h4  i68->h4  i69->h4  i70->h4  i71->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h4  i73->h4  i74->h4  i75->h4  i76->h4  i77->h4  i78->h4  i79->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h4  i81->h4  i82->h4  i83->h4  i84->h4  i85->h4  i86->h4  i87->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h4  i89->h4  i90->h4  i91->h4  i92->h4  i93->h4  i94->h4  i95->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h4  i97->h4  i98->h4  i99->h4 i100->h4 i101->h4 i102->h4 i103->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h5   i1->h5   i2->h5   i3->h5   i4->h5   i5->h5   i6->h5   i7->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h5   i9->h5  i10->h5  i11->h5  i12->h5  i13->h5  i14->h5  i15->h5 
##     0.00     0.00     0.00    -0.17     0.00     0.00     0.00     0.00 
##  i16->h5  i17->h5  i18->h5  i19->h5  i20->h5  i21->h5  i22->h5  i23->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h5  i25->h5  i26->h5  i27->h5  i28->h5  i29->h5  i30->h5  i31->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h5  i33->h5  i34->h5  i35->h5  i36->h5  i37->h5  i38->h5  i39->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h5  i41->h5  i42->h5  i43->h5  i44->h5  i45->h5  i46->h5  i47->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h5  i49->h5  i50->h5  i51->h5  i52->h5  i53->h5  i54->h5  i55->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h5  i57->h5  i58->h5  i59->h5  i60->h5  i61->h5  i62->h5  i63->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h5  i65->h5  i66->h5  i67->h5  i68->h5  i69->h5  i70->h5  i71->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h5  i73->h5  i74->h5  i75->h5  i76->h5  i77->h5  i78->h5  i79->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h5  i81->h5  i82->h5  i83->h5  i84->h5  i85->h5  i86->h5  i87->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h5  i89->h5  i90->h5  i91->h5  i92->h5  i93->h5  i94->h5  i95->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h5  i97->h5  i98->h5  i99->h5 i100->h5 i101->h5 i102->h5 i103->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h6   i1->h6   i2->h6   i3->h6   i4->h6   i5->h6   i6->h6   i7->h6 
##     0.00     0.01     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h6   i9->h6  i10->h6  i11->h6  i12->h6  i13->h6  i14->h6  i15->h6 
##     0.00     0.00     0.00     0.01     0.00     0.00     0.00     0.00 
##  i16->h6  i17->h6  i18->h6  i19->h6  i20->h6  i21->h6  i22->h6  i23->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h6  i25->h6  i26->h6  i27->h6  i28->h6  i29->h6  i30->h6  i31->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h6  i33->h6  i34->h6  i35->h6  i36->h6  i37->h6  i38->h6  i39->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h6  i41->h6  i42->h6  i43->h6  i44->h6  i45->h6  i46->h6  i47->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h6  i49->h6  i50->h6  i51->h6  i52->h6  i53->h6  i54->h6  i55->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h6  i57->h6  i58->h6  i59->h6  i60->h6  i61->h6  i62->h6  i63->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h6  i65->h6  i66->h6  i67->h6  i68->h6  i69->h6  i70->h6  i71->h6 
##    -0.61     0.11    -0.04     0.00     0.00     0.00     0.00     0.00 
##  i72->h6  i73->h6  i74->h6  i75->h6  i76->h6  i77->h6  i78->h6  i79->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h6  i81->h6  i82->h6  i83->h6  i84->h6  i85->h6  i86->h6  i87->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h6  i89->h6  i90->h6  i91->h6  i92->h6  i93->h6  i94->h6  i95->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h6  i97->h6  i98->h6  i99->h6 i100->h6 i101->h6 i102->h6 i103->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h6 i105->h6 i106->h6 i107->h6 i108->h6 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h7   i1->h7   i2->h7   i3->h7   i4->h7   i5->h7   i6->h7   i7->h7 
##    -0.25    -0.30     0.00    -0.65     0.47     0.00     0.24     0.08 
##   i8->h7   i9->h7  i10->h7  i11->h7  i12->h7  i13->h7  i14->h7  i15->h7 
##    -0.13    -0.25     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h7  i17->h7  i18->h7  i19->h7  i20->h7  i21->h7  i22->h7  i23->h7 
##     0.00     0.00     0.00     0.00    -0.05     0.43     0.00    -0.31 
##  i24->h7  i25->h7  i26->h7  i27->h7  i28->h7  i29->h7  i30->h7  i31->h7 
##     0.00     0.00     0.00    -0.31    -0.97    -0.17     0.00     0.41 
##  i32->h7  i33->h7  i34->h7  i35->h7  i36->h7  i37->h7  i38->h7  i39->h7 
##     0.00    -0.49     0.00     0.00     0.00    -0.20     0.00    -0.05 
##  i40->h7  i41->h7  i42->h7  i43->h7  i44->h7  i45->h7  i46->h7  i47->h7 
##     0.00     0.00     0.27     0.16    -0.02     0.00    -0.21     0.00 
##  i48->h7  i49->h7  i50->h7  i51->h7  i52->h7  i53->h7  i54->h7  i55->h7 
##    -0.02    -0.12    -0.05     0.62    -0.42    -0.05    -0.13     0.00 
##  i56->h7  i57->h7  i58->h7  i59->h7  i60->h7  i61->h7  i62->h7  i63->h7 
##    -0.27     0.00    -0.21     0.29    -0.12    -0.20    -0.38     0.13 
##  i64->h7  i65->h7  i66->h7  i67->h7  i68->h7  i69->h7  i70->h7  i71->h7 
##     0.01     0.01     0.03     0.06     0.00     0.00     0.00     0.00 
##  i72->h7  i73->h7  i74->h7  i75->h7  i76->h7  i77->h7  i78->h7  i79->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h7  i81->h7  i82->h7  i83->h7  i84->h7  i85->h7  i86->h7  i87->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h7  i89->h7  i90->h7  i91->h7  i92->h7  i93->h7  i94->h7  i95->h7 
##     0.00     0.00     0.00    -0.02     0.00     0.00     0.00     0.00 
##  i96->h7  i97->h7  i98->h7  i99->h7 i100->h7 i101->h7 i102->h7 i103->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h7 i105->h7 i106->h7 i107->h7 i108->h7 
##     0.00     0.00    -0.29     0.00     0.00 
##  b->o h1->o h2->o h3->o h4->o h5->o h6->o h7->o 
## -0.08 -1.17  1.44 -0.08 -0.08  0.02 -0.13  2.70
vip(Adult_TDA_KDE_5.50.5_n4_NN1Fit0,50) + ggtitle("Adult_TDA_KDE_5.50.5_n4_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_KDE_5.50.5_n4_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n4_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n4_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n4_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7341  1886
##      >50K      75   466
##                                           
##                Accuracy : 0.7992          
##                  95% CI : (0.7912, 0.8071)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.2551          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9899          
##             Specificity : 0.1981          
##          Pos Pred Value : 0.7956          
##          Neg Pred Value : 0.8614          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7515          
##    Detection Prevalence : 0.9446          
##       Balanced Accuracy : 0.5940          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n4_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7341  1886
##      >50K      75   466
##                                           
##                Accuracy : 0.7992          
##                  95% CI : (0.7912, 0.8071)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.2551          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9899          
##             Specificity : 0.1981          
##          Pos Pred Value : 0.7956          
##          Neg Pred Value : 0.8614          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7515          
##    Detection Prevalence : 0.9446          
##       Balanced Accuracy : 0.5940          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n4_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.992424e-01   2.550720e-01   7.911589e-01   8.071472e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   2.052017e-21   0.000000e+00
ad_tda_kde_5.50.5_n4_nn1_cf0_ov_acc<-ad_tda_kde_5.50.5_n4_nn1_cf0$overall[1]
ad_tda_kde_5.50.5_n4_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9898867            0.1981293            0.7955999 
##       Neg Pred Value            Precision               Recall 
##            0.8613678            0.7955999            0.9898867 
##                   F1           Prevalence       Detection Rate 
##            0.8821727            0.7592138            0.7515356 
## Detection Prevalence    Balanced Accuracy 
##            0.9446151            0.5940080
ad_tda_kde_5.50.5_n4_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n4_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_nn1_n4_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.50.5_n4_nn1_fit_re)
diff_tda_kde_5.50.5_nn1_n4_3_fold
##       Accuracy
## 1 -0.020733269
## 2 -0.008496842
## 3 -0.010769893
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nn1.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n4_3_fold
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nn1.n4_3_fold_odds.left<-bst_tda_kde_5.50.5_nn1.n4_3_fold$probLeft/bst_tda_kde_5.50.5_nn1.n4_3_fold$probRight
bst_tda_kde_5.50.5_nn1.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nn1.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n4_3_fold
## $winLeft
## [1] 0.6112667
## 
## $winRope
## [1] 0.3887333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nn1.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n4_3_fold
## $left
## [1] 0.7386676
## 
## $rope
## [1] 0.2448903
## 
## $right
## [1] 0.01644208
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nn1_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nn1.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1_n4_3_fold))
#bf_tda_kde_5.50.5_nn1.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nn1_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_nn1_n4_3_fold)
## t = -3.5483, df = 2, p-value = 0.07106
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.02950142  0.00283475
## sample estimates:
##   mean of x 
## -0.01333333
### Test set diff
diff_tda_kde_5.50.5_nn1.n4_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n4_nn1_cf0_ov_acc)
diff_tda_kde_5.50.5_nn1.n4_test
##   Accuracy 
## 0.02856265
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nn1.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1.n4_test),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nn1.n4_test_odds.left<-bst_tda_kde_5.50.5_nn1.n4_test$probLeft/bst_tda_kde_5.50.5_nn1.n4_test$probRight
bst_tda_kde_5.50.5_nn1.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nn1.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1.n4_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1583667
## 
## $winRight
## [1] 0.8416333
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nn1.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nn1.n4_test)))

#BayesFactor
#bf_tda_kde_5.50.5_nn1.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1.n4_test)) #bf_tda_pca_5.50.5_nn1.n4_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nn1.n4_test))

##Node5

#Neural Network 1

Adult_TDA_KDE_5.50.5_n5_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n5.vec, 
                           Importance = T,
                     method = 'nnet', 
                     trControl = fitControl,
                     tuneGrid = nn1Grid,
                     metric='Accuracy')
## # weights:  221
## initial  value 4380.555309 
## iter  10 value 2157.200772
## iter  20 value 2136.771432
## iter  30 value 2008.063300
## iter  40 value 1988.549132
## iter  50 value 1973.538255
## iter  60 value 1965.434521
## iter  70 value 1943.442816
## iter  80 value 1882.459591
## iter  90 value 1664.944270
## iter 100 value 1499.973222
## final  value 1499.973222 
## stopped after 100 iterations
## # weights:  331
## initial  value 3514.825850 
## iter  10 value 2089.025595
## iter  20 value 2008.803558
## iter  30 value 2008.340225
## iter  40 value 1998.802716
## iter  50 value 1993.711994
## iter  60 value 1991.508163
## iter  70 value 1990.224360
## iter  80 value 1989.967123
## final  value 1989.966604 
## converged
## # weights:  551
## initial  value 4536.165854 
## iter  10 value 2157.769272
## iter  20 value 2150.733742
## iter  30 value 2060.439589
## iter  40 value 2054.759531
## iter  50 value 2032.765884
## iter  60 value 1965.508907
## iter  70 value 1751.913072
## iter  80 value 1612.684795
## iter  90 value 1523.102111
## iter 100 value 1446.366786
## final  value 1446.366786 
## stopped after 100 iterations
## # weights:  771
## initial  value 3404.225218 
## iter  10 value 2019.700117
## iter  20 value 1952.667083
## iter  30 value 1745.113401
## iter  40 value 1610.809244
## iter  50 value 1402.996107
## iter  60 value 1364.651790
## iter  70 value 1352.904239
## iter  80 value 1342.568272
## iter  90 value 1338.522050
## iter 100 value 1335.495519
## final  value 1335.495519 
## stopped after 100 iterations
## # weights:  221
## initial  value 2692.167823 
## iter  10 value 2025.158023
## iter  20 value 2002.541997
## iter  30 value 1814.541565
## iter  40 value 1599.393312
## iter  50 value 1530.545797
## iter  60 value 1509.197151
## iter  70 value 1501.003902
## iter  80 value 1452.910845
## iter  90 value 1431.181091
## iter 100 value 1414.272088
## final  value 1414.272088 
## stopped after 100 iterations
## # weights:  331
## initial  value 2619.528371 
## iter  10 value 2003.386145
## iter  20 value 1971.191886
## iter  30 value 1909.400068
## iter  40 value 1866.290734
## iter  50 value 1696.020747
## iter  60 value 1519.047767
## iter  70 value 1417.451978
## iter  80 value 1386.135711
## iter  90 value 1370.909851
## iter 100 value 1366.375956
## final  value 1366.375956 
## stopped after 100 iterations
## # weights:  551
## initial  value 2528.285958 
## iter  10 value 2075.369509
## iter  20 value 2001.464099
## iter  30 value 1998.337990
## iter  40 value 1996.238058
## iter  50 value 1988.324874
## iter  60 value 1907.809262
## iter  70 value 1709.006098
## iter  80 value 1512.656891
## iter  90 value 1455.392647
## iter 100 value 1431.733736
## final  value 1431.733736 
## stopped after 100 iterations
## # weights:  771
## initial  value 2701.350397 
## iter  10 value 2161.190472
## iter  20 value 2158.590882
## iter  30 value 2158.558642
## iter  40 value 2112.977794
## iter  50 value 1938.192211
## iter  60 value 1586.783546
## iter  70 value 1513.163037
## iter  80 value 1457.374281
## iter  90 value 1441.160837
## iter 100 value 1411.404872
## final  value 1411.404872 
## stopped after 100 iterations
## # weights:  221
## initial  value 2408.053046 
## iter  10 value 2159.068957
## iter  20 value 2158.238885
## iter  30 value 2089.152637
## iter  40 value 2054.637114
## iter  50 value 2038.288170
## iter  60 value 1990.181040
## iter  70 value 1971.967624
## iter  80 value 1965.926715
## iter  90 value 1960.741795
## iter 100 value 1956.025554
## final  value 1956.025554 
## stopped after 100 iterations
## # weights:  331
## initial  value 4361.152570 
## iter  10 value 2158.565865
## iter  20 value 2043.305120
## iter  30 value 2009.069500
## iter  40 value 1999.464079
## iter  50 value 1909.627337
## iter  60 value 1750.289500
## iter  70 value 1475.424429
## iter  80 value 1453.086841
## iter  90 value 1451.259056
## iter 100 value 1402.255739
## final  value 1402.255739 
## stopped after 100 iterations
## # weights:  551
## initial  value 5665.089415 
## iter  10 value 2038.442414
## iter  20 value 2030.938404
## iter  30 value 2029.128702
## iter  40 value 2009.729056
## iter  50 value 2007.982530
## iter  60 value 2007.109723
## iter  70 value 2005.107031
## iter  80 value 1935.860991
## iter  90 value 1818.397813
## iter 100 value 1625.467296
## final  value 1625.467296 
## stopped after 100 iterations
## # weights:  771
## initial  value 4000.436587 
## iter  10 value 2035.955879
## iter  20 value 2022.358252
## iter  30 value 2008.790947
## iter  40 value 1991.878364
## iter  50 value 1987.821493
## iter  60 value 1932.153381
## iter  70 value 1824.230974
## iter  80 value 1699.334329
## iter  90 value 1572.655875
## iter 100 value 1498.425695
## final  value 1498.425695 
## stopped after 100 iterations
## # weights:  221
## initial  value 2504.690137 
## iter  10 value 2157.643913
## iter  20 value 2139.349481
## iter  30 value 2096.130398
## iter  40 value 2010.835067
## iter  50 value 1933.945638
## iter  60 value 1906.481855
## iter  70 value 1852.890897
## iter  80 value 1847.592656
## iter  90 value 1630.567064
## iter 100 value 1506.629972
## final  value 1506.629972 
## stopped after 100 iterations
## # weights:  331
## initial  value 3471.197015 
## iter  10 value 2078.498216
## iter  20 value 2003.338160
## iter  30 value 2000.348788
## iter  40 value 2000.328316
## final  value 2000.328200 
## converged
## # weights:  551
## initial  value 3760.798736 
## iter  10 value 2026.078061
## iter  20 value 1988.800931
## iter  30 value 1981.092025
## iter  40 value 1963.829479
## iter  50 value 1963.308508
## iter  60 value 1952.238986
## iter  70 value 1943.954518
## iter  80 value 1932.455433
## iter  90 value 1802.352825
## iter 100 value 1492.742762
## final  value 1492.742762 
## stopped after 100 iterations
## # weights:  771
## initial  value 5637.712107 
## iter  10 value 2031.692297
## iter  20 value 1979.474055
## iter  30 value 1954.483910
## iter  40 value 1918.608040
## iter  50 value 1864.462474
## iter  60 value 1798.900650
## iter  70 value 1592.764573
## iter  80 value 1452.235041
## iter  90 value 1397.859937
## iter 100 value 1386.782706
## final  value 1386.782706 
## stopped after 100 iterations
## # weights:  221
## initial  value 3398.111972 
## iter  10 value 2030.918059
## iter  20 value 2013.828491
## iter  30 value 2005.991306
## iter  40 value 1976.604078
## iter  50 value 1831.421867
## iter  60 value 1515.106640
## iter  70 value 1474.370091
## iter  80 value 1419.717761
## iter  90 value 1399.861306
## iter 100 value 1361.397061
## final  value 1361.397061 
## stopped after 100 iterations
## # weights:  331
## initial  value 2739.331892 
## iter  10 value 2140.093965
## iter  20 value 2064.592327
## iter  30 value 2057.197058
## iter  40 value 2018.011101
## iter  50 value 1988.375691
## iter  60 value 1983.456966
## iter  70 value 1982.982158
## iter  80 value 1979.731602
## iter  90 value 1893.861236
## iter 100 value 1744.766959
## final  value 1744.766959 
## stopped after 100 iterations
## # weights:  551
## initial  value 3737.702977 
## iter  10 value 2147.153698
## iter  20 value 2034.668818
## iter  30 value 2019.939108
## iter  40 value 2018.555230
## iter  50 value 1991.592019
## iter  60 value 1911.890654
## iter  70 value 1797.716304
## iter  80 value 1768.944402
## iter  90 value 1659.816705
## iter 100 value 1516.902187
## final  value 1516.902187 
## stopped after 100 iterations
## # weights:  771
## initial  value 4869.290838 
## iter  10 value 2149.252094
## iter  20 value 2141.312975
## iter  30 value 2141.188052
## iter  40 value 2012.984186
## iter  50 value 1994.607218
## iter  60 value 1988.611143
## iter  70 value 1982.341249
## iter  80 value 1968.062930
## iter  90 value 1962.695092
## iter 100 value 1949.801118
## final  value 1949.801118 
## stopped after 100 iterations
## # weights:  221
## initial  value 4417.828990 
## iter  10 value 2162.923309
## iter  20 value 2160.507500
## iter  30 value 2160.463759
## iter  40 value 2036.944225
## iter  50 value 2002.387159
## iter  60 value 1938.472402
## iter  70 value 1834.617569
## iter  80 value 1735.712937
## iter  90 value 1534.652755
## iter 100 value 1443.666918
## final  value 1443.666918 
## stopped after 100 iterations
## # weights:  331
## initial  value 5770.462788 
## iter  10 value 2156.318761
## iter  20 value 2152.603018
## iter  30 value 2026.959523
## iter  40 value 1755.754664
## iter  50 value 1668.813315
## iter  60 value 1431.635754
## iter  70 value 1396.968555
## iter  80 value 1389.392470
## iter  90 value 1382.678508
## iter 100 value 1376.545022
## final  value 1376.545022 
## stopped after 100 iterations
## # weights:  551
## initial  value 3929.810502 
## iter  10 value 2159.430694
## iter  20 value 2158.077747
## iter  30 value 2158.062149
## final  value 2158.062033 
## converged
## # weights:  771
## initial  value 2640.382834 
## iter  10 value 2087.471045
## iter  20 value 2005.490969
## iter  30 value 1992.216901
## iter  40 value 1990.064644
## iter  50 value 1980.781323
## iter  60 value 1966.444964
## iter  70 value 1787.320069
## iter  80 value 1595.363469
## iter  90 value 1515.326714
## iter 100 value 1483.300397
## final  value 1483.300397 
## stopped after 100 iterations
## # weights:  221
## initial  value 3333.132016 
## iter  10 value 2025.840440
## iter  20 value 2021.560910
## iter  30 value 2019.575744
## iter  40 value 2017.087846
## iter  50 value 2009.711444
## iter  60 value 1984.735809
## iter  70 value 1973.863716
## iter  80 value 1954.179000
## iter  90 value 1946.336430
## iter 100 value 1694.432985
## final  value 1694.432985 
## stopped after 100 iterations
## # weights:  331
## initial  value 2641.981451 
## iter  10 value 2159.690080
## iter  20 value 2158.217901
## iter  30 value 2108.428371
## iter  40 value 2036.789978
## iter  50 value 2030.136373
## iter  60 value 2005.012307
## iter  70 value 1850.870902
## iter  80 value 1651.702846
## iter  90 value 1506.914267
## iter 100 value 1487.734658
## final  value 1487.734658 
## stopped after 100 iterations
## # weights:  551
## initial  value 2631.571977 
## iter  10 value 2066.877330
## iter  20 value 2009.657783
## iter  30 value 1990.181462
## iter  40 value 1885.814984
## iter  50 value 1686.955939
## iter  60 value 1586.066984
## iter  70 value 1444.701000
## iter  80 value 1418.707296
## iter  90 value 1418.074196
## iter 100 value 1396.709796
## final  value 1396.709796 
## stopped after 100 iterations
## # weights:  771
## initial  value 10049.454894 
## iter  10 value 2145.880098
## iter  20 value 2057.682078
## iter  30 value 1982.481328
## iter  40 value 1948.522216
## iter  50 value 1862.203402
## iter  60 value 1858.655962
## iter  70 value 1740.297559
## iter  80 value 1507.627273
## iter  90 value 1478.804216
## iter 100 value 1464.176905
## final  value 1464.176905 
## stopped after 100 iterations
## # weights:  221
## initial  value 4413.458192 
## iter  10 value 2161.364644
## iter  20 value 2159.844151
## iter  30 value 2159.739362
## final  value 2159.739223 
## converged
## # weights:  331
## initial  value 3963.492731 
## iter  10 value 2037.691119
## iter  20 value 2012.514770
## iter  30 value 1992.766271
## iter  40 value 1937.271315
## iter  50 value 1724.787835
## iter  60 value 1580.636685
## iter  70 value 1492.128061
## iter  80 value 1468.181366
## iter  90 value 1426.053434
## iter 100 value 1389.949247
## final  value 1389.949247 
## stopped after 100 iterations
## # weights:  551
## initial  value 6250.991290 
## iter  10 value 2090.701735
## iter  20 value 2013.977159
## iter  30 value 1997.215547
## iter  40 value 1990.539971
## iter  50 value 1985.862750
## iter  60 value 1980.338522
## iter  70 value 1975.648474
## iter  80 value 1972.441812
## iter  90 value 1971.573603
## iter 100 value 1971.407792
## final  value 1971.407792 
## stopped after 100 iterations
## # weights:  771
## initial  value 4164.337088 
## iter  10 value 2059.286435
## iter  20 value 1998.593424
## iter  30 value 1507.246475
## iter  40 value 1418.297931
## iter  50 value 1380.504695
## iter  60 value 1361.812422
## iter  70 value 1353.939485
## iter  80 value 1348.997327
## iter  90 value 1342.885919
## iter 100 value 1333.149179
## final  value 1333.149179 
## stopped after 100 iterations
## # weights:  221
## initial  value 6012.600004 
## iter  10 value 2160.271632
## iter  20 value 2141.458587
## iter  30 value 2044.511163
## iter  40 value 1962.413351
## iter  50 value 1669.366623
## iter  60 value 1572.982507
## iter  70 value 1525.050207
## iter  80 value 1432.397493
## iter  90 value 1392.762683
## iter 100 value 1380.619870
## final  value 1380.619870 
## stopped after 100 iterations
## # weights:  331
## initial  value 2409.581373 
## iter  10 value 2035.056273
## iter  20 value 2019.657824
## iter  30 value 2019.145037
## iter  40 value 2018.992402
## final  value 2018.990567 
## converged
## # weights:  551
## initial  value 4443.038008 
## iter  10 value 2164.832428
## iter  20 value 2160.327999
## iter  30 value 2157.576348
## iter  40 value 2041.009165
## iter  50 value 2036.444743
## iter  60 value 2032.571473
## iter  70 value 2031.850003
## iter  80 value 2022.871134
## iter  90 value 2020.151944
## iter 100 value 2019.944731
## final  value 2019.944731 
## stopped after 100 iterations
## # weights:  771
## initial  value 7803.890149 
## iter  10 value 2057.420033
## iter  20 value 2045.146798
## iter  30 value 2015.976940
## iter  40 value 1990.015911
## iter  50 value 1986.657525
## iter  60 value 1981.740359
## iter  70 value 1973.218389
## iter  80 value 1942.504964
## iter  90 value 1863.992888
## iter 100 value 1830.428985
## final  value 1830.428985 
## stopped after 100 iterations
## # weights:  771
## initial  value 4796.789636 
## iter  10 value 3217.733477
## iter  20 value 3028.559525
## iter  30 value 2956.919815
## iter  40 value 2819.903214
## iter  50 value 2670.976447
## iter  60 value 2496.590839
## iter  70 value 2481.830564
## iter  80 value 2423.205114
## iter  90 value 2185.694959
## iter 100 value 2072.161532
## final  value 2072.161532 
## stopped after 100 iterations
Adult_TDA_KDE_5.50.5_n5_NN1Fit0
## Neural Network 
## 
## 7540 samples
##  108 predictor
##    2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 5027, 5026, 5027 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa    
##   2     0.3    0.8641914  0.1982213
##   2     0.5    0.8623328  0.2523514
##   2     0.7    0.8652527  0.2552265
##   3     0.3    0.8622020  0.1884833
##   3     0.5    0.8668446  0.2925104
##   3     0.7    0.8664450  0.3367942
##   5     0.3    0.8659154  0.2781519
##   5     0.5    0.8635282  0.2920675
##   5     0.7    0.8578265  0.1491520
##   7     0.3    0.8680374  0.3510028
##   7     0.5    0.8680380  0.3352310
##   7     0.7    0.8571630  0.2546601
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 7 and decay = 0.5.
Adult_TDA_KDE_5.50.5_n5_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.8611774 0.1749223    Fold2
## 2 0.8647035 0.4110519    Fold1
## 3 0.8782332 0.4197190    Fold3
ad_tda_kde_5.50.5_n5_nn1_fit_re<-Adult_TDA_KDE_5.50.5_n5_NN1Fit0$resample[1]

summary(Adult_TDA_KDE_5.50.5_n5_NN1Fit0)
## a 108-7-1 network with 771 weights
## options were - entropy fitting  decay=0.5
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##    -0.36     0.10    -0.76     1.46    -0.24    -0.05    -0.17     0.44 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##    -0.73    -0.23    -0.09     0.00     0.00     0.00     0.00     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00     0.00     1.30     0.00     0.00    -1.51 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.00     0.00     0.00    -0.15    -0.79    -0.69     0.68     1.68 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##    -0.08    -1.00    -0.92    -0.03    -0.80    -0.02    -0.09    -0.12 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.68    -1.18    -1.01    -0.29    -0.96    -0.31     1.67     0.65 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.22     1.78    -0.57     0.30    -0.25    -0.64    -1.20    -0.58 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     2.01    -1.37     1.02     0.35    -0.96     0.61    -0.57     0.21 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.00     0.00     0.06     0.16    -0.11    -0.16     0.66    -0.42 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.74    -0.47     0.31    -0.61     0.01    -0.03     0.17    -0.09 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.35    -0.11     0.00    -0.08    -0.29    -0.07    -0.16     0.11 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.05    -0.24     0.89    -0.47    -0.27    -0.57    -0.30    -0.06 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##    -0.14     0.80    -0.29     0.05    -0.50    -0.19     0.08    -0.16 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##    -0.05    -0.22     0.62     0.21     0.45 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h3   i1->h3   i2->h3   i3->h3   i4->h3   i5->h3   i6->h3   i7->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h3   i9->h3  i10->h3  i11->h3  i12->h3  i13->h3  i14->h3  i15->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h3  i17->h3  i18->h3  i19->h3  i20->h3  i21->h3  i22->h3  i23->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h3  i25->h3  i26->h3  i27->h3  i28->h3  i29->h3  i30->h3  i31->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h3  i33->h3  i34->h3  i35->h3  i36->h3  i37->h3  i38->h3  i39->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h3  i41->h3  i42->h3  i43->h3  i44->h3  i45->h3  i46->h3  i47->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h3  i49->h3  i50->h3  i51->h3  i52->h3  i53->h3  i54->h3  i55->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h3  i57->h3  i58->h3  i59->h3  i60->h3  i61->h3  i62->h3  i63->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h3  i65->h3  i66->h3  i67->h3  i68->h3  i69->h3  i70->h3  i71->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h3  i73->h3  i74->h3  i75->h3  i76->h3  i77->h3  i78->h3  i79->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h3  i81->h3  i82->h3  i83->h3  i84->h3  i85->h3  i86->h3  i87->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h3  i89->h3  i90->h3  i91->h3  i92->h3  i93->h3  i94->h3  i95->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h3  i97->h3  i98->h3  i99->h3 i100->h3 i101->h3 i102->h3 i103->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h4   i1->h4   i2->h4   i3->h4   i4->h4   i5->h4   i6->h4   i7->h4 
##     0.05     0.05    -0.03     0.19     0.04     0.00    -0.27     0.53 
##   i8->h4   i9->h4  i10->h4  i11->h4  i12->h4  i13->h4  i14->h4  i15->h4 
##    -0.31    -0.10     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h4  i17->h4  i18->h4  i19->h4  i20->h4  i21->h4  i22->h4  i23->h4 
##     0.00     0.00     0.00     0.00     0.07     0.00     0.00     0.42 
##  i24->h4  i25->h4  i26->h4  i27->h4  i28->h4  i29->h4  i30->h4  i31->h4 
##     0.00     0.00     0.00    -0.44     0.17    -0.85    -0.09     0.04 
##  i32->h4  i33->h4  i34->h4  i35->h4  i36->h4  i37->h4  i38->h4  i39->h4 
##     0.01     1.11    -0.13    -0.04    -0.02     0.61     0.01    -0.73 
##  i40->h4  i41->h4  i42->h4  i43->h4  i44->h4  i45->h4  i46->h4  i47->h4 
##     0.16     0.11    -0.22     0.35     0.18     0.07    -0.38    -0.31 
##  i48->h4  i49->h4  i50->h4  i51->h4  i52->h4  i53->h4  i54->h4  i55->h4 
##     0.20    -0.01     0.04     0.06    -0.45     0.11     0.75    -0.42 
##  i56->h4  i57->h4  i58->h4  i59->h4  i60->h4  i61->h4  i62->h4  i63->h4 
##     0.00    -0.06     0.11    -0.29     0.04     0.25     0.54    -0.49 
##  i64->h4  i65->h4  i66->h4  i67->h4  i68->h4  i69->h4  i70->h4  i71->h4 
##     0.00     0.00     0.08     0.14    -0.01     0.08     0.00    -0.01 
##  i72->h4  i73->h4  i74->h4  i75->h4  i76->h4  i77->h4  i78->h4  i79->h4 
##    -0.09     0.00     0.04     0.03     0.05     0.00     0.03     0.00 
##  i80->h4  i81->h4  i82->h4  i83->h4  i84->h4  i85->h4  i86->h4  i87->h4 
##    -0.07     0.02     0.00     0.00    -0.01     0.00    -0.02     0.03 
##  i88->h4  i89->h4  i90->h4  i91->h4  i92->h4  i93->h4  i94->h4  i95->h4 
##    -0.01     0.08    -0.06    -0.02    -0.01    -0.28     0.00     0.01 
##  i96->h4  i97->h4  i98->h4  i99->h4 i100->h4 i101->h4 i102->h4 i103->h4 
##     0.00     0.11     0.04     0.00     0.14    -0.01    -0.01     0.00 
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4 
##    -0.01     0.00    -0.14     0.01     0.03 
##    b->h5   i1->h5   i2->h5   i3->h5   i4->h5   i5->h5   i6->h5   i7->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h5   i9->h5  i10->h5  i11->h5  i12->h5  i13->h5  i14->h5  i15->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h5  i17->h5  i18->h5  i19->h5  i20->h5  i21->h5  i22->h5  i23->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h5  i25->h5  i26->h5  i27->h5  i28->h5  i29->h5  i30->h5  i31->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h5  i33->h5  i34->h5  i35->h5  i36->h5  i37->h5  i38->h5  i39->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h5  i41->h5  i42->h5  i43->h5  i44->h5  i45->h5  i46->h5  i47->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h5  i49->h5  i50->h5  i51->h5  i52->h5  i53->h5  i54->h5  i55->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h5  i57->h5  i58->h5  i59->h5  i60->h5  i61->h5  i62->h5  i63->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h5  i65->h5  i66->h5  i67->h5  i68->h5  i69->h5  i70->h5  i71->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h5  i73->h5  i74->h5  i75->h5  i76->h5  i77->h5  i78->h5  i79->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h5  i81->h5  i82->h5  i83->h5  i84->h5  i85->h5  i86->h5  i87->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h5  i89->h5  i90->h5  i91->h5  i92->h5  i93->h5  i94->h5  i95->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h5  i97->h5  i98->h5  i99->h5 i100->h5 i101->h5 i102->h5 i103->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h6   i1->h6   i2->h6   i3->h6   i4->h6   i5->h6   i6->h6   i7->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h6   i9->h6  i10->h6  i11->h6  i12->h6  i13->h6  i14->h6  i15->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h6  i17->h6  i18->h6  i19->h6  i20->h6  i21->h6  i22->h6  i23->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h6  i25->h6  i26->h6  i27->h6  i28->h6  i29->h6  i30->h6  i31->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h6  i33->h6  i34->h6  i35->h6  i36->h6  i37->h6  i38->h6  i39->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h6  i41->h6  i42->h6  i43->h6  i44->h6  i45->h6  i46->h6  i47->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h6  i49->h6  i50->h6  i51->h6  i52->h6  i53->h6  i54->h6  i55->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h6  i57->h6  i58->h6  i59->h6  i60->h6  i61->h6  i62->h6  i63->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h6  i65->h6  i66->h6  i67->h6  i68->h6  i69->h6  i70->h6  i71->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h6  i73->h6  i74->h6  i75->h6  i76->h6  i77->h6  i78->h6  i79->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h6  i81->h6  i82->h6  i83->h6  i84->h6  i85->h6  i86->h6  i87->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h6  i89->h6  i90->h6  i91->h6  i92->h6  i93->h6  i94->h6  i95->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h6  i97->h6  i98->h6  i99->h6 i100->h6 i101->h6 i102->h6 i103->h6 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h6 i105->h6 i106->h6 i107->h6 i108->h6 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h7   i1->h7   i2->h7   i3->h7   i4->h7   i5->h7   i6->h7   i7->h7 
##    -0.01    -0.15    -0.01    -0.01    -0.05     0.00    -0.11     0.09 
##   i8->h7   i9->h7  i10->h7  i11->h7  i12->h7  i13->h7  i14->h7  i15->h7 
##     0.04     0.03     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h7  i17->h7  i18->h7  i19->h7  i20->h7  i21->h7  i22->h7  i23->h7 
##     0.00     0.00     0.00     0.00    -0.03     0.00     0.00    -0.16 
##  i24->h7  i25->h7  i26->h7  i27->h7  i28->h7  i29->h7  i30->h7  i31->h7 
##     0.00     0.00     0.00     0.17    -0.02    -0.10     0.00     0.11 
##  i32->h7  i33->h7  i34->h7  i35->h7  i36->h7  i37->h7  i38->h7  i39->h7 
##     0.00     0.01    -0.03     0.00    -0.01     0.02     0.00     0.00 
##  i40->h7  i41->h7  i42->h7  i43->h7  i44->h7  i45->h7  i46->h7  i47->h7 
##     0.01     0.00    -0.08     0.03     0.10     0.00    -0.07    -0.03 
##  i48->h7  i49->h7  i50->h7  i51->h7  i52->h7  i53->h7  i54->h7  i55->h7 
##     0.09     0.03    -0.12    -0.09    -0.13    -0.01    -0.05     0.05 
##  i56->h7  i57->h7  i58->h7  i59->h7  i60->h7  i61->h7  i62->h7  i63->h7 
##     0.22    -0.05     0.08     0.00     0.00    -0.04     0.23    -0.24 
##  i64->h7  i65->h7  i66->h7  i67->h7  i68->h7  i69->h7  i70->h7  i71->h7 
##    -0.01    -0.04     0.44     0.11     0.00    -0.01     0.00     0.00 
##  i72->h7  i73->h7  i74->h7  i75->h7  i76->h7  i77->h7  i78->h7  i79->h7 
##     0.11     0.00     0.00    -0.07     0.02     0.00     0.00     0.00 
##  i80->h7  i81->h7  i82->h7  i83->h7  i84->h7  i85->h7  i86->h7  i87->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00    -0.01     0.00 
##  i88->h7  i89->h7  i90->h7  i91->h7  i92->h7  i93->h7  i94->h7  i95->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h7  i97->h7  i98->h7  i99->h7 i100->h7 i101->h7 i102->h7 i103->h7 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h7 i105->h7 i106->h7 i107->h7 i108->h7 
##     0.00     0.00    -0.16     0.00     0.00 
##  b->o h1->o h2->o h3->o h4->o h5->o h6->o h7->o 
## -0.52  4.52 -0.04 -0.48 -1.75 -0.38 -0.48 -0.88
vip(Adult_TDA_KDE_5.50.5_n5_NN1Fit0,50) + ggtitle("Adult_TDA_KDE_5.50.5_n5_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_KDE_5.50.5_n5_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n5_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n5_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n5_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6864  1660
##      >50K     552   692
##                                           
##                Accuracy : 0.7735          
##                  95% CI : (0.7651, 0.7818)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.0004467       
##                                           
##                   Kappa : 0.2619          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9256          
##             Specificity : 0.2942          
##          Pos Pred Value : 0.8053          
##          Neg Pred Value : 0.5563          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7027          
##    Detection Prevalence : 0.8726          
##       Balanced Accuracy : 0.6099          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n5_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6864  1660
##      >50K     552   692
##                                           
##                Accuracy : 0.7735          
##                  95% CI : (0.7651, 0.7818)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.0004467       
##                                           
##                   Kappa : 0.2619          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9256          
##             Specificity : 0.2942          
##          Pos Pred Value : 0.8053          
##          Neg Pred Value : 0.5563          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7027          
##    Detection Prevalence : 0.8726          
##       Balanced Accuracy : 0.6099          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n5_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.735463e-01   2.619098e-01   7.651142e-01   7.818150e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   4.466916e-04  1.697207e-122
ad_tda_kde_5.50.5_n5_nn1_cf0_ov_acc<-ad_tda_kde_5.50.5_n5_nn1_cf0$overall[1]
ad_tda_kde_5.50.5_n5_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9255663            0.2942177            0.8052557 
##       Neg Pred Value            Precision               Recall 
##            0.5562701            0.8052557            0.9255663 
##                   F1           Prevalence       Detection Rate 
##            0.8612296            0.7592138            0.7027027 
## Detection Prevalence    Balanced Accuracy 
##            0.8726454            0.6098920
ad_tda_kde_5.50.5_n5_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n5_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_nn1_n5_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.50.5_n5_nn1_fit_re)
diff_tda_kde_5.50.5_nn1_n5_3_fold
##      Accuracy
## 1 -0.03253829
## 2 -0.01844844
## 3 -0.07131031
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nn1.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n5_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nn1.n5_3_fold_odds.left<-bst_tda_kde_5.50.5_nn1.n5_3_fold$probLeft/bst_tda_kde_5.50.5_nn1.n5_3_fold$probRight
bst_tda_kde_5.50.5_nn1.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nn1.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n5_3_fold
## $winLeft
## [1] 0.9645
## 
## $winRope
## [1] 0.0355
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nn1.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n5_3_fold
## $left
## [1] 0.8830625
## 
## $rope
## [1] 0.06264481
## 
## $right
## [1] 0.05429269
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nn1_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nn1.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1_n5_3_fold))
#bf_tda_kde_5.50.5_nn1.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nn1_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_nn1_n5_3_fold)
## t = -2.5793, df = 2, p-value = 0.1231
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.10876765  0.02723628
## sample estimates:
##   mean of x 
## -0.04076568
### Test set diff
diff_tda_kde_5.50.5_nn1.n5_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n5_nn1_cf0_ov_acc)
diff_tda_kde_5.50.5_nn1.n5_test
##  Accuracy 
## 0.0542588
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nn1.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1.n5_test),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nn1.n5_test_odds.left<-bst_tda_kde_5.50.5_nn1.n5_test$probLeft/bst_tda_kde_5.50.5_nn1.n5_test$probRight
bst_tda_kde_5.50.5_nn1.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nn1.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1.n4_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1550667
## 
## $winRight
## [1] 0.8449333
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nn1.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nn1.n5_test)))

#BayesFactor
#bf_tda_kde_5.50.5_nn1.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1.n5_test)) #bf_tda_pca_5.50.5_nn1.n5_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nn1.n5_test)) 


##Logistic Regression 

adultLrFit <- train(as.factor(adult_df1) ~ ., 
                 data = adult.one_hot_df4Train, 
                 family = 'binomial',
                method = 'glm', 
                 trControl = fitControl,
                metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
adultLrFit
## Generalized Linear Model 
## 
## 22793 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 15196, 15195, 15195 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.8500417  0.5639457
adultLrFit$resample
##    Accuracy     Kappa Resample
## 1 0.8511254 0.5647397    Fold1
## 2 0.8494341 0.5602067    Fold2
## 3 0.8495657 0.5668908    Fold3
ad_lr_fit_re<-adultLrFit$resample[1]

summary(adultLrFit)
## 
## Call:
## NULL
## 
## Coefficients: (10 not defined because of singularities)
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                     2.684e+12  3.378e+12   0.794 0.427012    
## V1                              2.493e-02  1.973e-03  12.639  < 2e-16 ***
## V2..                           -2.684e+12  3.378e+12  -0.794 0.427012    
## V2.Federal.gov                 -2.684e+12  3.378e+12  -0.794 0.427012    
## V2.Local.gov                   -2.684e+12  3.378e+12  -0.794 0.427012    
## V2.Never.worked                -2.684e+12  3.378e+12  -0.794 0.427012    
## V2.Private                     -2.684e+12  3.378e+12  -0.794 0.427012    
## V2.Self.emp.inc                -2.684e+12  3.378e+12  -0.794 0.427012    
## V2.Self.emp.not.inc            -2.684e+12  3.378e+12  -0.794 0.427012    
## V2.State.gov                   -2.684e+12  3.378e+12  -0.794 0.427012    
## V2.Without.pay                 -2.684e+12  3.378e+12  -0.794 0.427012    
## V3                              4.279e-07  2.065e-07   2.072 0.038232 *  
## V4.10th                        -1.141e+00  1.892e-01  -6.032 1.62e-09 ***
## V4.11th                        -1.118e+00  1.822e-01  -6.137 8.41e-10 ***
## V4.12th                        -7.823e-01  2.673e-01  -2.927 0.003426 ** 
## V4.1st.4th                     -1.690e+00  5.344e-01  -3.162 0.001568 ** 
## V4.5th.6th                     -1.812e+00  3.874e-01  -4.678 2.90e-06 ***
## V4.7th.8th                     -1.769e+00  2.324e-01  -7.613 2.68e-14 ***
## V4.9th                         -1.323e+00  2.456e-01  -5.388 7.12e-08 ***
## V4.Assoc.acdm                   8.067e-02  1.155e-01   0.698 0.484945    
## V4.Assoc.voc                    2.085e-01  1.025e-01   2.035 0.041860 *  
## V4.Bachelors                    6.892e-01  6.664e-02  10.343  < 2e-16 ***
## V4.Doctorate                    1.930e+00  1.933e-01   9.981  < 2e-16 ***
## V4.HS.grad                     -3.922e-01  5.994e-02  -6.543 6.03e-11 ***
## V4.Masters                      1.038e+00  9.727e-02  10.669  < 2e-16 ***
## V4.Preschool                   -3.140e+01  3.838e+04  -0.001 0.999347    
## V4.Prof.school                  1.615e+00  1.577e-01  10.237  < 2e-16 ***
## V4.Some.college                        NA         NA      NA       NA    
## V5                                     NA         NA      NA       NA    
## V6.Divorced                     9.411e-02  1.947e-01   0.483 0.628925    
## V6.Married.AF.spouse            2.864e+00  6.256e-01   4.578 4.68e-06 ***
## V6.Married.civ.spouse           2.435e+00  3.708e-01   6.568 5.10e-11 ***
## V6.Married.spouse.absent        3.207e-02  3.186e-01   0.101 0.919825    
## V6.Never.married               -4.464e-01  2.026e-01  -2.203 0.027592 *  
## V6.Separated                   -9.802e-03  2.589e-01  -0.038 0.969796    
## V6.Widowed                             NA         NA      NA       NA    
## V7..                                   NA         NA      NA       NA    
## V7.Adm.clerical                -1.271e-02  1.203e-01  -0.106 0.915873    
## V7.Armed.Forces                -8.642e-01  1.684e+00  -0.513 0.607920    
## V7.Craft.repair                 1.696e-01  1.010e-01   1.679 0.093107 .  
## V7.Exec.managerial              8.454e-01  1.047e-01   8.074 6.80e-16 ***
## V7.Farming.fishing             -9.421e-01  1.702e-01  -5.535 3.11e-08 ***
## V7.Handlers.cleaners           -6.985e-01  1.756e-01  -3.978 6.95e-05 ***
## V7.Machine.op.inspct           -1.907e-01  1.281e-01  -1.489 0.136533    
## V7.Other.service               -7.682e-01  1.474e-01  -5.213 1.86e-07 ***
## V7.Priv.house.serv             -3.825e+00  1.641e+00  -2.331 0.019736 *  
## V7.Prof.specialty               5.982e-01  1.120e-01   5.343 9.13e-08 ***
## V7.Protective.serv              6.015e-01  1.567e-01   3.839 0.000124 ***
## V7.Sales                        2.929e-01  1.080e-01   2.712 0.006679 ** 
## V7.Tech.support                 7.197e-01  1.422e-01   5.063 4.13e-07 ***
## V7.Transport.moving                    NA         NA      NA       NA    
## V8.Husband                     -1.425e+00  1.238e-01 -11.510  < 2e-16 ***
## V8.Not.in.family               -6.970e-01  3.375e-01  -2.065 0.038906 *  
## V8.Other.relative              -1.913e+00  3.100e-01  -6.171 6.80e-10 ***
## V8.Own.child                   -2.032e+00  3.295e-01  -6.166 7.00e-10 ***
## V8.Unmarried                   -8.062e-01  3.481e-01  -2.316 0.020566 *  
## V8.Wife                                NA         NA      NA       NA    
## V9.Amer.Indian.Eskimo          -6.055e-01  2.765e-01  -2.190 0.028544 *  
## V9.Asian.Pac.Islander           4.559e-02  1.810e-01   0.252 0.801159    
## V9.Black                       -1.676e-01  9.204e-02  -1.821 0.068598 .  
## V9.Other                       -3.852e-01  3.374e-01  -1.142 0.253648    
## V9.White                               NA         NA      NA       NA    
## V10.Female                     -8.607e-01  9.598e-02  -8.967  < 2e-16 ***
## V10.Male                               NA         NA      NA       NA    
## V11                             3.158e-04  1.256e-05  25.152  < 2e-16 ***
## V12                             6.765e-04  4.501e-05  15.029  < 2e-16 ***
## V13                             3.112e-02  1.952e-03  15.942  < 2e-16 ***
## V14..                          -1.885e+00  1.053e+00  -1.791 0.073256 .  
## V14.Cambodia                    2.758e-01  1.350e+00   0.204 0.838087    
## V14.Canada                     -1.634e+00  1.093e+00  -1.495 0.134823    
## V14.China                      -2.613e+00  1.131e+00  -2.310 0.020907 *  
## V14.Columbia                   -3.697e+00  1.349e+00  -2.742 0.006115 ** 
## V14.Cuba                       -1.336e+00  1.104e+00  -1.211 0.226028    
## V14.Dominican.Republic         -3.222e+00  1.483e+00  -2.173 0.029789 *  
## V14.Ecuador                    -1.724e+00  1.278e+00  -1.349 0.177185    
## V14.El.Salvador                -2.675e+00  1.205e+00  -2.221 0.026351 *  
## V14.England                    -1.564e+00  1.090e+00  -1.435 0.151364    
## V14.France                     -1.132e+00  1.196e+00  -0.946 0.344143    
## V14.Germany                    -1.175e+00  1.084e+00  -1.084 0.278365    
## V14.Greece                     -2.642e+00  1.230e+00  -2.149 0.031668 *  
## V14.Guatemala                  -2.477e+00  1.422e+00  -1.742 0.081480 .  
## V14.Haiti                      -2.242e+00  1.396e+00  -1.606 0.108362    
## V14.Holand.Netherlands                 NA         NA      NA       NA    
## V14.Honduras                   -2.969e+00  2.845e+00  -1.044 0.296702    
## V14.Hong                       -2.262e+00  1.578e+00  -1.434 0.151697    
## V14.Hungary                    -2.349e+00  1.386e+00  -1.694 0.090285 .  
## V14.India                      -1.652e+00  1.109e+00  -1.490 0.136334    
## V14.Iran                       -1.307e+00  1.159e+00  -1.128 0.259404    
## V14.Ireland                    -9.512e-01  1.240e+00  -0.767 0.443153    
## V14.Italy                      -7.117e-01  1.104e+00  -0.645 0.518997    
## V14.Jamaica                    -1.563e+00  1.164e+00  -1.342 0.179615    
## V14.Japan                      -1.364e+00  1.138e+00  -1.199 0.230650    
## V14.Laos                       -2.871e+00  1.565e+00  -1.834 0.066688 .  
## V14.Mexico                     -2.193e+00  1.071e+00  -2.047 0.040629 *  
## V14.Nicaragua                  -2.310e+00  1.522e+00  -1.518 0.129041    
## V14.Outlying.US.Guam.USVI.etc. -2.601e+01  8.470e+04   0.000 0.999755    
## V14.Peru                       -2.723e+00  1.500e+00  -1.816 0.069372 .  
## V14.Philippines                -9.092e-01  1.080e+00  -0.842 0.399974    
## V14.Poland                     -1.759e+00  1.133e+00  -1.552 0.120553    
## V14.Portugal                   -1.543e+00  1.223e+00  -1.261 0.207218    
## V14.Puerto.Rico                -2.205e+00  1.122e+00  -1.965 0.049435 *  
## V14.Scotland                   -1.943e+00  1.414e+00  -1.374 0.169322    
## V14.South                      -2.295e+00  1.150e+00  -1.996 0.045984 *  
## V14.Taiwan                     -1.875e+00  1.193e+00  -1.572 0.115858    
## V14.Thailand                   -2.214e+00  1.362e+00  -1.625 0.104123    
## V14.Trinadad.Tobago            -9.988e-01  1.443e+00  -0.692 0.488746    
## V14.United.States              -1.550e+00  1.039e+00  -1.492 0.135814    
## V14.Vietnam                    -3.188e+00  1.322e+00  -2.411 0.015894 *  
## V14.Yugoslavia                         NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 25165  on 22792  degrees of freedom
## Residual deviance: 14361  on 22694  degrees of freedom
## AIC: 14559
## 
## Number of Fisher Scoring iterations: 25
vip(adultLrFit,25) + ggtitle('non-TDA-Assisted LR')

# Predict outcome using model from training data based on testing data
predictions <- predict(adultLrFit, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
lr_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
lr_cf
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6893   917
##      >50K     523  1435
##                                           
##                Accuracy : 0.8526          
##                  95% CI : (0.8454, 0.8596)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5723          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9295          
##             Specificity : 0.6101          
##          Pos Pred Value : 0.8826          
##          Neg Pred Value : 0.7329          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7057          
##    Detection Prevalence : 0.7995          
##       Balanced Accuracy : 0.7698          
##                                           
##        'Positive' Class :  <=50K          
## 
lr_cf$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.525799e-01   5.723299e-01   8.453930e-01   8.595557e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  5.445581e-115   3.911751e-25
lr_cf_ov_acc<-lr_cf$overall[1]
lr_cf$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9294768            0.6101190            0.8825864 
##       Neg Pred Value            Precision               Recall 
##            0.7328907            0.8825864            0.9294768 
##                   F1           Prevalence       Detection Rate 
##            0.9054249            0.7592138            0.7056716 
## Detection Prevalence    Balanced Accuracy 
##            0.7995495            0.7697979
lr_cf_pre_rec_f1<-lr_cf$byClass[5:7]


##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node1

Adult_TDA_PC_5.50.5_n1_LrFit0 <- glm(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n1.vec, family = 'binomial')
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.50.5_n1_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_adult_5.50.5.n1.vec, 
                    family = 'binomial',
                          method = 'glm', 
                    trControl = fitControl,
                          metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.50.5_n1_LrFit0
## Generalized Linear Model 
## 
## 4917 samples
##  108 predictor
##    2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 3279, 3278, 3277 
## Resampling results:
## 
##   Accuracy  Kappa     
##   0.857161  0.03390148
Adult_TDA_PC_5.50.5_n1_LrFit0$resample
##    Accuracy        Kappa Resample
## 1 0.6300366  0.073586480    Fold1
## 2 0.9719341 -0.002339865    Fold2
## 3 0.9695122  0.030457813    Fold3
ad_tda_pc_5.50.5_n1_lr_fit_re<-Adult_TDA_PC_5.50.5_n1_LrFit0$resample[1]

summary(Adult_TDA_PC_5.50.5_n1_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (25 not defined because of singularities)
##                                  Estimate Std. Error    z value Pr(>|z|)    
## (Intercept)                     6.754e+15  1.035e+08   65242260   <2e-16 ***
## V1                             -4.021e+13  1.001e+05 -401739559   <2e-16 ***
## V2..                            1.555e+15  1.144e+07  135880986   <2e-16 ***
## V2.Federal.gov                 -8.210e+14  6.396e+06 -128368422   <2e-16 ***
## V2.Local.gov                   -3.322e+14  5.648e+06  -58813598   <2e-16 ***
## V2.Never.worked                        NA         NA         NA       NA    
## V2.Private                      2.257e+14  4.636e+06   48689222   <2e-16 ***
## V2.Self.emp.inc                -1.030e+15  5.363e+06 -192126826   <2e-16 ***
## V2.Self.emp.not.inc            -1.764e+15  5.342e+06 -330201149   <2e-16 ***
## V2.State.gov                           NA         NA         NA       NA    
## V2.Without.pay                         NA         NA         NA       NA    
## V3                              1.936e+08  9.807e+00   19741148   <2e-16 ***
## V4.10th                         5.343e+14  1.892e+07   28232875   <2e-16 ***
## V4.11th                         1.643e+15  2.266e+07   72471752   <2e-16 ***
## V4.12th                        -5.781e+14  3.020e+07  -19138755   <2e-16 ***
## V4.1st.4th                      1.762e+15  6.954e+07   25333567   <2e-16 ***
## V4.5th.6th                      1.178e+15  3.942e+07   29884200   <2e-16 ***
## V4.7th.8th                      1.870e+15  1.548e+07  120788873   <2e-16 ***
## V4.9th                          1.995e+15  3.027e+07   65929241   <2e-16 ***
## V4.Assoc.acdm                  -6.403e+14  5.949e+06 -107617549   <2e-16 ***
## V4.Assoc.voc                   -4.419e+14  5.274e+06  -83784354   <2e-16 ***
## V4.Bachelors                   -9.810e+14  3.195e+06 -307070713   <2e-16 ***
## V4.Doctorate                   -2.241e+15  5.430e+06 -412621450   <2e-16 ***
## V4.HS.grad                      1.710e+14  3.533e+06   48411646   <2e-16 ***
## V4.Masters                     -1.096e+15  3.832e+06 -286104624   <2e-16 ***
## V4.Preschool                           NA         NA         NA       NA    
## V4.Prof.school                 -1.241e+15  4.949e+06 -250795563   <2e-16 ***
## V4.Some.college                        NA         NA         NA       NA    
## V5                                     NA         NA         NA       NA    
## V6.Divorced                     2.246e+15  6.983e+07   32166187   <2e-16 ***
## V6.Married.AF.spouse            5.415e+15  1.067e+08   50765144   <2e-16 ***
## V6.Married.civ.spouse           3.561e+15  9.547e+07   37296901   <2e-16 ***
## V6.Married.spouse.absent       -3.913e+14  9.503e+07   -4116981   <2e-16 ***
## V6.Never.married               -1.548e+14  7.763e+07   -1993868   <2e-16 ***
## V6.Separated                           NA         NA         NA       NA    
## V6.Widowed                             NA         NA         NA       NA    
## V7..                                   NA         NA         NA       NA    
## V7.Adm.clerical                -8.947e+13  8.646e+06  -10348260   <2e-16 ***
## V7.Armed.Forces                 1.572e+15  6.759e+07   23257677   <2e-16 ***
## V7.Craft.repair                -3.669e+12  5.511e+06    -665768   <2e-16 ***
## V7.Exec.managerial              3.655e+14  5.294e+06   69045202   <2e-16 ***
## V7.Farming.fishing             -5.738e+14  8.252e+06  -69535707   <2e-16 ***
## V7.Handlers.cleaners            5.386e+14  1.867e+07   28841951   <2e-16 ***
## V7.Machine.op.inspct            7.351e+14  1.057e+07   69529076   <2e-16 ***
## V7.Other.service                2.137e+15  1.938e+07  110275000   <2e-16 ***
## V7.Priv.house.serv                     NA         NA         NA       NA    
## V7.Prof.specialty               3.170e+14  5.589e+06   56730038   <2e-16 ***
## V7.Protective.serv             -5.345e+14  7.672e+06  -69670508   <2e-16 ***
## V7.Sales                        8.977e+14  5.694e+06  157643421   <2e-16 ***
## V7.Tech.support                 7.911e+14  7.616e+06  103877755   <2e-16 ***
## V7.Transport.moving                    NA         NA         NA       NA    
## V8.Husband                     -1.433e+15  1.969e+07  -72760359   <2e-16 ***
## V8.Not.in.family                1.555e+15  7.050e+07   22057608   <2e-16 ***
## V8.Other.relative              -4.701e+13  7.015e+07    -670114   <2e-16 ***
## V8.Own.child                           NA         NA         NA       NA    
## V8.Unmarried                   -5.367e+14  8.790e+07   -6106524   <2e-16 ***
## V8.Wife                                NA         NA         NA       NA    
## V9.Amer.Indian.Eskimo           8.136e+14  2.250e+07   36160676   <2e-16 ***
## V9.Asian.Pac.Islander          -1.535e+15  9.355e+06 -164127666   <2e-16 ***
## V9.Black                       -1.255e+15  7.568e+06 -165859104   <2e-16 ***
## V9.Other                        1.048e+15  2.296e+07   45649335   <2e-16 ***
## V9.White                               NA         NA         NA       NA    
## V10.Female                             NA         NA         NA       NA    
## V10.Male                               NA         NA         NA       NA    
## V11                             1.515e+10  6.103e+01  248215727   <2e-16 ***
## V12                             1.548e+11  1.518e+03  101924136   <2e-16 ***
## V13                            -4.157e+13  8.947e+04 -464636898   <2e-16 ***
## V14..                          -2.127e+14  3.449e+07   -6166348   <2e-16 ***
## V14.Cambodia                    1.804e+15  5.903e+07   30563828   <2e-16 ***
## V14.Canada                     -9.754e+14  3.623e+07  -26924014   <2e-16 ***
## V14.China                       3.139e+14  3.899e+07    8050228   <2e-16 ***
## V14.Columbia                    2.155e+15  7.535e+07   28607405   <2e-16 ***
## V14.Cuba                       -1.569e+15  3.936e+07  -39865728   <2e-16 ***
## V14.Dominican.Republic                 NA         NA         NA       NA    
## V14.Ecuador                     2.000e+14  5.830e+07    3431112   <2e-16 ***
## V14.El.Salvador                 5.352e+14  4.521e+07   11838315   <2e-16 ***
## V14.England                     2.845e+13  3.794e+07     749833   <2e-16 ***
## V14.France                     -3.013e+14  4.125e+07   -7303826   <2e-16 ***
## V14.Germany                    -3.903e+14  3.639e+07  -10723816   <2e-16 ***
## V14.Greece                     -3.609e+15  4.359e+07  -82783695   <2e-16 ***
## V14.Guatemala                          NA         NA         NA       NA    
## V14.Haiti                              NA         NA         NA       NA    
## V14.Holand.Netherlands                 NA         NA         NA       NA    
## V14.Honduras                           NA         NA         NA       NA    
## V14.Hong                        2.628e+14  4.616e+07    5693624   <2e-16 ***
## V14.Hungary                     6.213e+14  5.836e+07   10646011   <2e-16 ***
## V14.India                       3.249e+13  3.665e+07     886349   <2e-16 ***
## V14.Iran                       -3.496e+15  3.831e+07  -91256665   <2e-16 ***
## V14.Ireland                    -2.987e+15  5.141e+07  -58110019   <2e-16 ***
## V14.Italy                       3.034e+13  3.807e+07     797040   <2e-16 ***
## V14.Jamaica                     2.904e+15  7.572e+07   38349803   <2e-16 ***
## V14.Japan                       5.594e+14  3.867e+07   14465190   <2e-16 ***
## V14.Laos                        2.427e+15  7.625e+07   31826490   <2e-16 ***
## V14.Mexico                      1.998e+14  3.917e+07    5100435   <2e-16 ***
## V14.Nicaragua                          NA         NA         NA       NA    
## V14.Outlying.US.Guam.USVI.etc.         NA         NA         NA       NA    
## V14.Peru                       -1.239e+15  7.521e+07  -16478711   <2e-16 ***
## V14.Philippines                -1.491e+15  3.695e+07  -40349667   <2e-16 ***
## V14.Poland                     -2.029e+14  4.348e+07   -4666562   <2e-16 ***
## V14.Portugal                    1.810e+13  5.836e+07     310228   <2e-16 ***
## V14.Puerto.Rico                -5.417e+14  4.800e+07  -11284445   <2e-16 ***
## V14.Scotland                    2.444e+15  7.521e+07   32494881   <2e-16 ***
## V14.South                      -2.945e+14  4.099e+07   -7184583   <2e-16 ***
## V14.Taiwan                      1.731e+15  3.897e+07   44411006   <2e-16 ***
## V14.Thailand                    1.398e+13  5.903e+07     236749   <2e-16 ***
## V14.Trinadad.Tobago                    NA         NA         NA       NA    
## V14.United.States              -8.790e+14  3.373e+07  -26058180   <2e-16 ***
## V14.Vietnam                     3.345e+15  7.597e+07   44023473   <2e-16 ***
## V14.Yugoslavia                         NA         NA         NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1208.3  on 4916  degrees of freedom
## Residual deviance: 9587.6  on 4833  degrees of freedom
## AIC: 9755.6
## 
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_PC_5.50.5_n1_LrFit0,50) + ggtitle("Adult_TDA_PCA_5.50.5_n1_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_PC_5.50.5_n1_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n1_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n1_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n1_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K     47    13
##      >50K    7369  2339
##                                           
##                Accuracy : 0.2443          
##                  95% CI : (0.2358, 0.2529)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 4e-04           
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.006338        
##             Specificity : 0.994473        
##          Pos Pred Value : 0.783333        
##          Neg Pred Value : 0.240935        
##              Prevalence : 0.759214        
##          Detection Rate : 0.004812        
##    Detection Prevalence : 0.006143        
##       Balanced Accuracy : 0.500405        
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n1_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K     47    13
##      >50K    7369  2339
##                                           
##                Accuracy : 0.2443          
##                  95% CI : (0.2358, 0.2529)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 4e-04           
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.006338        
##             Specificity : 0.994473        
##          Pos Pred Value : 0.783333        
##          Neg Pred Value : 0.240935        
##              Prevalence : 0.759214        
##          Detection Rate : 0.004812        
##    Detection Prevalence : 0.006143        
##       Balanced Accuracy : 0.500405        
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n1_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   0.2442669943   0.0003919283   0.2357726063   0.2529141141   0.7592137592 
## AccuracyPValue  McnemarPValue 
##   1.0000000000   0.0000000000
ad_tda_pc_5.50.5_n1_lr_cf0_ov_acc<-ad_tda_pc_5.50.5_n1_lr_cf0$overall[1]
ad_tda_pc_5.50.5_n1_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##          0.006337648          0.994472789          0.783333333 
##       Neg Pred Value            Precision               Recall 
##          0.240935311          0.783333333          0.006337648 
##                   F1           Prevalence       Detection Rate 
##          0.012573569          0.759213759          0.004811630 
## Detection Prevalence    Balanced Accuracy 
##          0.006142506          0.500405219
ad_tda_pc_5.50.5_n1_lr_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n1_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers

### 3-fold diff

diff_tda_pca_5.50.5_lr_n1_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.50.5_n1_lr_fit_re)
diff_tda_pca_5.50.5_lr_n1_3_fold
##     Accuracy
## 1  0.2210888
## 2 -0.1225000
## 3 -0.1199465
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_lr.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n1_3_fold
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_lr.n1_3_fold_odds.left<-bst_tda_pca_5.50.5_lr.n1_3_fold$probLeft/bst_tda_pca_5.50.5_lr.n1_3_fold$probRight
bst_tda_pca_5.50.5_lr.n1_3_fold_odds.left
## [1] 2
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_lr.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n1_3_fold
## $winLeft
## [1] 0.5395667
## 
## $winRope
## [1] 0.01743333
## 
## $winRight
## [1] 0.443
# Bayesian Correlated Test

bct_tda_pca_5.50.5_lr.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n1_3_fold
## $left
## [1] 0.4922709
## 
## $rope
## [1] 0.05347321
## 
## $right
## [1] 0.4542559
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_lr_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_lr.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr_n1_3_fold))
#bf_tda_pca_5.50.5_lr.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_lr_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_lr_n1_3_fold)
## t = -0.062391, df = 2, p-value = 0.9559
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.4980795  0.4838410
## sample estimates:
##   mean of x 
## -0.00711925
### Test set diff
diff_tda_pca_5.50.5_lr.n1_test<-(lr_cf_ov_acc - ad_tda_pc_5.50.5_n1_lr_cf0_ov_acc)
diff_tda_pca_5.50.5_lr.n1_test
##  Accuracy 
## 0.6083129
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_lr.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr.n1_test),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n1_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_lr.n1_test_odds.left<-bst_tda_pca_5.50.5_lr.n1_test$probLeft/bst_tda_pca_5.50.5_lr.n1_test$probRight
bst_tda_pca_5.50.5_lr.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_lr.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr.n1_test),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n1_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1572667
## 
## $winRight
## [1] 0.8427333
# Bayesian Correlated Test

bct_tda_pca_5.50.5_lr.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_lr.n1_test)))

#BayesFactor
#bf_tda_pca_5.50.5_lr.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr.n1_test)) #bf_tda_pca_5.50.5_lr.n1_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_lr.n1_test))

##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node2

Adult_TDA_PC_5.50.5_n2_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_adult_5.50.5.n2.vec, 
                      family = 'binomial',
                            method = 'glm', 
                      trControl = fitControl,
                            metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.50.5_n2_LrFit0
## Generalized Linear Model 
## 
## 12206 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8138, 8136, 8138 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.7168597  0.4317041
Adult_TDA_PC_5.50.5_n2_LrFit0$resample
##    Accuracy     Kappa Resample
## 1 0.7227139 0.4433708    Fold1
## 2 0.7221130 0.4431247    Fold2
## 3 0.7057522 0.4086167    Fold3
ad_tda_pc_5.50.5_n2_lr_fit_re<-Adult_TDA_PC_5.50.5_n2_LrFit0$resample[1]

summary(Adult_TDA_PC_5.50.5_n2_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (11 not defined because of singularities)
##                                  Estimate Std. Error    z value Pr(>|z|)    
## (Intercept)                    -1.967e+13  2.870e+13 -6.850e-01 0.493065    
## V1                              1.336e-02  2.046e-03  6.528e+00 6.67e-11 ***
## V2..                           -4.308e+12  8.550e+12 -5.040e-01 0.614376    
## V2.Federal.gov                 -4.308e+12  8.551e+12 -5.040e-01 0.614396    
## V2.Local.gov                   -4.308e+12  8.551e+12 -5.040e-01 0.614388    
## V2.Never.worked                        NA         NA         NA       NA    
## V2.Private                     -4.308e+12  8.551e+12 -5.040e-01 0.614408    
## V2.Self.emp.inc                -4.308e+12  8.551e+12 -5.040e-01 0.614386    
## V2.Self.emp.not.inc            -4.308e+12  8.551e+12 -5.040e-01 0.614388    
## V2.State.gov                   -4.308e+12  8.550e+12 -5.040e-01 0.614372    
## V2.Without.pay                 -4.308e+12  8.551e+12 -5.040e-01 0.614382    
## V3                              1.019e-06  2.175e-07  4.687e+00 2.77e-06 ***
## V4.10th                        -6.537e-01  1.826e-01 -3.579e+00 0.000345 ***
## V4.11th                        -7.294e-01  2.044e-01 -3.568e+00 0.000359 ***
## V4.12th                        -2.521e-01  2.832e-01 -8.900e-01 0.373348    
## V4.1st.4th                     -5.050e-01  5.586e-01 -9.040e-01 0.365952    
## V4.5th.6th                     -5.680e-01  3.741e-01 -1.518e+00 0.128967    
## V4.7th.8th                     -1.287e+00  2.030e-01 -6.339e+00 2.31e-10 ***
## V4.9th                         -1.013e+00  2.999e-01 -3.376e+00 0.000734 ***
## V4.Assoc.acdm                   5.309e-03  1.222e-01  4.300e-02 0.965361    
## V4.Assoc.voc                   -4.145e-02  1.021e-01 -4.060e-01 0.684913    
## V4.Bachelors                    5.747e-01  6.873e-02  8.361e+00  < 2e-16 ***
## V4.Doctorate                    1.132e+00  1.918e-01  5.905e+00 3.53e-09 ***
## V4.HS.grad                     -3.084e-01  6.013e-02 -5.129e+00 2.91e-07 ***
## V4.Masters                      9.349e-01  1.021e-01  9.157e+00  < 2e-16 ***
## V4.Preschool                           NA         NA         NA       NA    
## V4.Prof.school                  1.073e+00  1.699e-01  6.317e+00 2.67e-10 ***
## V4.Some.college                        NA         NA         NA       NA    
## V5                                     NA         NA         NA       NA    
## V6.Divorced                     2.398e+13  3.454e+13  6.940e-01 0.487623    
## V6.Married.AF.spouse            2.398e+13  3.454e+13  6.940e-01 0.487623    
## V6.Married.civ.spouse           2.398e+13  3.454e+13  6.940e-01 0.487623    
## V6.Married.spouse.absent        4.528e+15  3.454e+13  1.311e+02  < 2e-16 ***
## V6.Never.married                2.398e+13  3.454e+13  6.940e-01 0.487623    
## V6.Separated                    2.398e+13  3.454e+13  6.940e-01 0.487623    
## V6.Widowed                      2.398e+13  3.454e+13  6.940e-01 0.487623    
## V7..                                   NA         NA         NA       NA    
## V7.Adm.clerical                 6.669e-01  1.361e-01  4.899e+00 9.63e-07 ***
## V7.Armed.Forces                -5.528e-01  1.732e+00 -3.190e-01 0.749587    
## V7.Craft.repair                 1.326e-01  9.167e-02  1.447e+00 0.148034    
## V7.Exec.managerial              9.516e-01  9.742e-02  9.768e+00  < 2e-16 ***
## V7.Farming.fishing             -6.079e-01  1.485e-01 -4.092e+00 4.27e-05 ***
## V7.Handlers.cleaners            6.106e-02  1.781e-01  3.430e-01 0.731742    
## V7.Machine.op.inspct            1.666e-01  1.202e-01  1.386e+00 0.165808    
## V7.Other.service                1.136e-02  1.794e-01  6.300e-02 0.949500    
## V7.Priv.house.serv             -2.811e+01  3.621e+05  0.000e+00 0.999938    
## V7.Prof.specialty               6.883e-01  1.073e-01  6.417e+00 1.39e-10 ***
## V7.Protective.serv              6.582e-01  1.474e-01  4.466e+00 7.97e-06 ***
## V7.Sales                        4.691e-01  1.005e-01  4.669e+00 3.03e-06 ***
## V7.Tech.support                 9.348e-01  1.447e-01  6.462e+00 1.03e-10 ***
## V7.Transport.moving                    NA         NA         NA       NA    
## V8.Husband                      4.404e-01  1.426e+00  3.090e-01 0.757500    
## V8.Not.in.family                1.221e+00  1.601e+00  7.630e-01 0.445618    
## V8.Other.relative               7.811e-01  1.527e+00  5.120e-01 0.608889    
## V8.Own.child                    1.533e+00  1.591e+00  9.630e-01 0.335406    
## V8.Unmarried                    2.370e+01  3.638e+04  1.000e-03 0.999480    
## V8.Wife                                NA         NA         NA       NA    
## V9.Amer.Indian.Eskimo          -4.932e-01  3.265e-01 -1.511e+00 0.130889    
## V9.Asian.Pac.Islander           2.948e-01  2.032e-01  1.450e+00 0.146939    
## V9.Black                        9.881e-01  1.415e-01  6.985e+00 2.84e-12 ***
## V9.Other                        4.578e-01  4.690e-01  9.760e-01 0.329059    
## V9.White                               NA         NA         NA       NA    
## V10.Female                      4.686e+00  1.511e+00  3.101e+00 0.001930 ** 
## V10.Male                               NA         NA         NA       NA    
## V11                             2.798e-04  1.449e-05  1.931e+01  < 2e-16 ***
## V12                             5.656e-04  4.669e-05  1.211e+01  < 2e-16 ***
## V13                             2.005e-02  2.014e-03  9.957e+00  < 2e-16 ***
## V14..                          -3.265e-01  6.869e-01 -4.750e-01 0.634603    
## V14.Cambodia                    1.581e+00  1.115e+00  1.418e+00 0.156332    
## V14.Canada                      3.195e-01  7.366e-01  4.340e-01 0.664429    
## V14.China                      -1.030e+00  8.133e-01 -1.267e+00 0.205153    
## V14.Columbia                   -2.191e+00  1.156e+00 -1.895e+00 0.058028 .  
## V14.Cuba                        5.253e-01  7.717e-01  6.810e-01 0.496104    
## V14.Dominican.Republic         -2.697e+01  2.279e+05  0.000e+00 0.999906    
## V14.Ecuador                    -2.249e-01  1.043e+00 -2.160e-01 0.829290    
## V14.El.Salvador                -3.510e-01  9.132e-01 -3.840e-01 0.700685    
## V14.England                     2.505e-01  7.825e-01  3.200e-01 0.748876    
## V14.France                      5.230e-01  1.010e+00  5.180e-01 0.604632    
## V14.Germany                     4.033e-01  7.343e-01  5.490e-01 0.582832    
## V14.Greece                     -1.370e+00  9.195e-01 -1.490e+00 0.136305    
## V14.Guatemala                  -1.322e+00  1.963e+00 -6.740e-01 0.500601    
## V14.Haiti                       1.385e-01  1.430e+00  9.700e-02 0.922871    
## V14.Holand.Netherlands                 NA         NA         NA       NA    
## V14.Honduras                    4.504e+15  6.711e+07  6.711e+07  < 2e-16 ***
## V14.Hong                       -1.354e-01  1.065e+00 -1.270e-01 0.898818    
## V14.Hungary                    -3.015e-02  1.194e+00 -2.500e-02 0.979847    
## V14.India                      -9.204e-01  7.529e-01 -1.222e+00 0.221573    
## V14.Iran                       -6.423e-02  8.333e-01 -7.700e-02 0.938563    
## V14.Ireland                     1.402e+00  1.331e+00  1.054e+00 0.291937    
## V14.Italy                       1.437e-01  7.628e-01  1.880e-01 0.850565    
## V14.Jamaica                    -2.473e-01  9.478e-01 -2.610e-01 0.794146    
## V14.Japan                      -4.180e-01  8.248e-01 -5.070e-01 0.612321    
## V14.Laos                        2.603e+01  4.776e+05  0.000e+00 0.999957    
## V14.Mexico                      1.362e-01  7.326e-01  1.860e-01 0.852530    
## V14.Nicaragua                  -1.396e+00  1.467e+00 -9.520e-01 0.341303    
## V14.Outlying.US.Guam.USVI.etc. -2.492e+01  2.745e+05  0.000e+00 0.999928    
## V14.Peru                        2.271e-01  1.315e+00  1.730e-01 0.862897    
## V14.Philippines                 5.721e-01  7.622e-01  7.510e-01 0.452892    
## V14.Poland                     -1.261e-01  8.095e-01 -1.560e-01 0.876255    
## V14.Portugal                   -3.059e-01  1.145e+00 -2.670e-01 0.789288    
## V14.Puerto.Rico                -4.806e-01  9.040e-01 -5.320e-01 0.594987    
## V14.Scotland                    7.373e-01  1.418e+00  5.200e-01 0.603005    
## V14.South                      -9.358e-01  8.345e-01 -1.121e+00 0.262128    
## V14.Taiwan                     -5.569e-01  8.562e-01 -6.500e-01 0.515408    
## V14.Thailand                   -6.617e-02  1.450e+00 -4.600e-02 0.963608    
## V14.Trinadad.Tobago             2.728e+01  2.628e+05  0.000e+00 0.999917    
## V14.United.States               9.008e-02  6.663e-01  1.350e-01 0.892453    
## V14.Vietnam                    -1.255e+00  1.123e+00 -1.118e+00 0.263501    
## V14.Yugoslavia                         NA         NA         NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 16823  on 12205  degrees of freedom
## Residual deviance: 12903  on 12108  degrees of freedom
## AIC: 13099
## 
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_PC_5.50.5_n2_LrFit0,50) + ggtitle("Adult_TDA_PCA_5.50.5_n2_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_PC_5.50.5_n2_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n2_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n2_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n2_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   1723   509
##      >50K    5693  1843
##                                           
##                Accuracy : 0.3651          
##                  95% CI : (0.3555, 0.3747)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.0091          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.2323          
##             Specificity : 0.7836          
##          Pos Pred Value : 0.7720          
##          Neg Pred Value : 0.2446          
##              Prevalence : 0.7592          
##          Detection Rate : 0.1764          
##    Detection Prevalence : 0.2285          
##       Balanced Accuracy : 0.5080          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n2_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   1723   509
##      >50K    5693  1843
##                                           
##                Accuracy : 0.3651          
##                  95% CI : (0.3555, 0.3747)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.0091          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.2323          
##             Specificity : 0.7836          
##          Pos Pred Value : 0.7720          
##          Neg Pred Value : 0.2446          
##              Prevalence : 0.7592          
##          Detection Rate : 0.1764          
##    Detection Prevalence : 0.2285          
##       Balanced Accuracy : 0.5080          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n2_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##     0.36506962     0.00908627     0.35551219     0.37470756     0.75921376 
## AccuracyPValue  McnemarPValue 
##     1.00000000     0.00000000
ad_tda_pc_5.50.5_n2_lr_cf0_ov_acc<-ad_tda_pc_5.50.5_n2_lr_cf0$overall[1]
ad_tda_pc_5.50.5_n2_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.2323355            0.7835884            0.7719534 
##       Neg Pred Value            Precision               Recall 
##            0.2445594            0.7719534            0.2323355 
##                   F1           Prevalence       Detection Rate 
##            0.3571725            0.7592138            0.1763923 
## Detection Prevalence    Balanced Accuracy 
##            0.2285012            0.5079620
ad_tda_pc_5.50.5_n2_lr_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n2_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers

### 3-fold diff

diff_tda_pca_5.50.5_lr_n2_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.50.5_n2_lr_fit_re)
diff_tda_pca_5.50.5_lr_n2_3_fold
##    Accuracy
## 1 0.1284116
## 2 0.1273210
## 3 0.1438135
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_lr.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_lr.n2_3_fold_odds.left<-bst_tda_pca_5.50.5_lr.n2_3_fold$probLeft/bst_tda_pca_5.50.5_lr.n2_3_fold$probRight
bst_tda_pca_5.50.5_lr.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_lr.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.009833333
## 
## $winRight
## [1] 0.9901667
# Bayesian Correlated Test

bct_tda_pca_5.50.5_lr.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n2_3_fold
## $left
## [1] 0.0009195529
## 
## $rope
## [1] 0.000321639
## 
## $right
## [1] 0.9987588
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_lr_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_lr.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr_n2_3_fold))
#bf_tda_pca_5.50.5_lr.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_lr_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_lr_n2_3_fold)
## t = 25.011, df = 2, p-value = 0.001595
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.1102703 0.1560938
## sample estimates:
## mean of x 
##  0.133182
### Test set diff
diff_tda_pca_5.50.5_lr.n2_test<-(lr_cf_ov_acc - ad_tda_pc_5.50.5_n2_lr_cf0_ov_acc)
diff_tda_pca_5.50.5_lr.n2_test
##  Accuracy 
## 0.4875102
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_lr.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr.n2_test),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_lr.n2_test_odds.left<-bst_tda_pca_5.50.5_lr.n2_test$probLeft/bst_tda_pca_5.50.5_lr.n2_test$probRight
bst_tda_pca_5.50.5_lr.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_lr.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr.n2_test),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1591
## 
## $winRight
## [1] 0.8409
# Bayesian Correlated Test

bct_tda_pca_5.50.5_lr.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_lr.n2_test)))

#BayesFactor
#bf_tda_pca_5.50.5_lr.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr.n2_test)) #bf_tda_pca_5.50.5_lr.n2_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_lr.n2_test))

##Node3

Adult_TDA_PC_5.50.5_n3_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_adult_5.50.5.n3.vec, 
                      family = 'binomial',
                            method = 'glm', 
                      trControl = fitControl,
                            metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.50.5_n3_LrFit0
## Generalized Linear Model 
## 
## 13240 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8827, 8827, 8826 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.8284742  0.4265563
Adult_TDA_PC_5.50.5_n3_LrFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8305008 0.4347173    Fold1
## 2 0.8248357 0.4140545    Fold2
## 3 0.8300861 0.4308971    Fold3
ad_tda_pc_5.50.5_n3_lr_fit_re<-Adult_TDA_PC_5.50.5_n2_LrFit0$resample[1]

summary(Adult_TDA_PC_5.50.5_n3_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (11 not defined because of singularities)
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                    -1.180e+13  8.650e+12  -1.364 0.172639    
## V1                              2.544e-03  2.296e-03   1.108 0.267922    
## V2..                            1.180e+13  8.650e+12   1.364 0.172639    
## V2.Federal.gov                  1.180e+13  8.650e+12   1.364 0.172639    
## V2.Local.gov                    1.180e+13  8.650e+12   1.364 0.172639    
## V2.Never.worked                        NA         NA      NA       NA    
## V2.Private                      1.180e+13  8.650e+12   1.364 0.172639    
## V2.Self.emp.inc                 1.180e+13  8.650e+12   1.364 0.172639    
## V2.Self.emp.not.inc             1.180e+13  8.650e+12   1.364 0.172639    
## V2.State.gov                    1.180e+13  8.650e+12   1.364 0.172639    
## V2.Without.pay                  1.180e+13  8.650e+12   1.364 0.172639    
## V3                              1.127e-06  2.361e-07   4.773 1.81e-06 ***
## V4.10th                        -3.082e-01  1.751e-01  -1.760 0.078340 .  
## V4.11th                        -4.031e-01  1.777e-01  -2.269 0.023258 *  
## V4.12th                         1.067e-01  2.582e-01   0.413 0.679442    
## V4.1st.4th                     -9.017e-01  4.887e-01  -1.845 0.065014 .  
## V4.5th.6th                     -9.334e-01  3.349e-01  -2.787 0.005318 ** 
## V4.7th.8th                     -1.341e+00  2.545e-01  -5.269 1.37e-07 ***
## V4.9th                         -9.128e-01  2.554e-01  -3.575 0.000351 ***
## V4.Assoc.acdm                  -5.927e-01  1.449e-01  -4.092 4.28e-05 ***
## V4.Assoc.voc                   -4.086e-01  1.262e-01  -3.236 0.001210 ** 
## V4.Bachelors                   -6.358e-01  8.503e-02  -7.478 7.57e-14 ***
## V4.Doctorate                    1.530e-01  2.176e-01   0.703 0.482035    
## V4.HS.grad                     -3.751e-01  6.898e-02  -5.437 5.42e-08 ***
## V4.Masters                     -4.995e-01  1.200e-01  -4.163 3.14e-05 ***
## V4.Preschool                   -3.157e+01  6.581e+04   0.000 0.999617    
## V4.Prof.school                 -7.407e-02  1.962e-01  -0.378 0.705726    
## V4.Some.college                        NA         NA      NA       NA    
## V5                                     NA         NA      NA       NA    
## V6.Divorced                    -5.751e-01  1.975e-01  -2.912 0.003586 ** 
## V6.Married.AF.spouse            1.453e+00  8.056e-01   1.804 0.071232 .  
## V6.Married.civ.spouse          -2.388e-01  3.406e-01  -0.701 0.483250    
## V6.Married.spouse.absent       -4.412e-01  3.077e-01  -1.434 0.151629    
## V6.Never.married               -3.763e-01  2.065e-01  -1.822 0.068456 .  
## V6.Separated                   -3.602e-01  2.592e-01  -1.389 0.164685    
## V6.Widowed                             NA         NA      NA       NA    
## V7..                                   NA         NA      NA       NA    
## V7.Adm.clerical                 1.219e+00  1.374e-01   8.872  < 2e-16 ***
## V7.Armed.Forces                -2.519e+01  2.088e+05   0.000 0.999904    
## V7.Craft.repair                 1.752e-01  1.230e-01   1.425 0.154147    
## V7.Exec.managerial              5.016e-01  1.296e-01   3.871 0.000108 ***
## V7.Farming.fishing             -1.295e+00  2.989e-01  -4.331 1.48e-05 ***
## V7.Handlers.cleaners            4.344e-01  1.723e-01   2.522 0.011685 *  
## V7.Machine.op.inspct            6.695e-01  1.368e-01   4.895 9.84e-07 ***
## V7.Other.service                3.295e-01  1.593e-01   2.069 0.038550 *  
## V7.Priv.house.serv             -2.618e+00  7.482e+00  -0.350 0.726442    
## V7.Prof.specialty               4.234e-01  1.366e-01   3.100 0.001937 ** 
## V7.Protective.serv              2.434e-01  2.090e-01   1.164 0.244225    
## V7.Sales                        7.925e-01  1.283e-01   6.175 6.61e-10 ***
## V7.Tech.support                 1.111e+00  1.648e-01   6.740 1.58e-11 ***
## V7.Transport.moving                    NA         NA      NA       NA    
## V8.Husband                     -8.944e-01  1.333e-01  -6.709 1.97e-11 ***
## V8.Not.in.family                3.686e-02  3.046e-01   0.121 0.903684    
## V8.Other.relative              -4.079e-01  2.831e-01  -1.441 0.149643    
## V8.Own.child                   -4.954e-01  2.973e-01  -1.666 0.095670 .  
## V8.Unmarried                    1.740e-01  3.193e-01   0.545 0.585776    
## V8.Wife                                NA         NA      NA       NA    
## V9.Amer.Indian.Eskimo           3.932e-01  2.667e-01   1.474 0.140444    
## V9.Asian.Pac.Islander           6.353e-01  2.048e-01   3.101 0.001926 ** 
## V9.Black                        9.496e-01  9.520e-02   9.975  < 2e-16 ***
## V9.Other                        4.337e-01  3.313e-01   1.309 0.190520    
## V9.White                               NA         NA      NA       NA    
## V10.Female                      1.575e+00  1.125e-01  13.996  < 2e-16 ***
## V10.Male                               NA         NA      NA       NA    
## V11                             2.870e-04  1.397e-05  20.547  < 2e-16 ***
## V12                             2.752e-04  5.333e-05   5.161 2.46e-07 ***
## V13                             4.299e-03  2.299e-03   1.870 0.061477 .  
## V14..                          -1.261e+00  8.687e-01  -1.451 0.146647    
## V14.Cambodia                   -2.165e-02  1.122e+00  -0.019 0.984608    
## V14.Canada                     -1.081e+00  9.282e-01  -1.165 0.244126    
## V14.China                      -2.365e+00  1.014e+00  -2.333 0.019646 *  
## V14.Columbia                   -2.434e+00  1.350e+00  -1.803 0.071370 .  
## V14.Cuba                       -2.517e-01  9.402e-01  -0.268 0.788946    
## V14.Dominican.Republic         -2.448e+00  1.379e+00  -1.775 0.075862 .  
## V14.Ecuador                    -3.383e-01  1.170e+00  -0.289 0.772471    
## V14.El.Salvador                -1.134e+00  1.040e+00  -1.090 0.275655    
## V14.England                    -5.701e-01  9.346e-01  -0.610 0.541868    
## V14.France                     -8.438e-01  1.125e+00  -0.750 0.453229    
## V14.Germany                    -3.181e-01  9.164e-01  -0.347 0.728528    
## V14.Greece                     -2.236e+00  1.227e+00  -1.823 0.068329 .  
## V14.Guatemala                  -9.200e-01  1.120e+00  -0.821 0.411456    
## V14.Haiti                      -5.254e-01  1.101e+00  -0.477 0.633061    
## V14.Holand.Netherlands                 NA         NA      NA       NA    
## V14.Honduras                    2.462e+01  4.015e+05   0.000 0.999951    
## V14.Hong                       -1.924e+00  1.476e+00  -1.303 0.192448    
## V14.Hungary                    -1.522e+00  1.448e+00  -1.051 0.293204    
## V14.India                      -1.700e+00  9.754e-01  -1.743 0.081350 .  
## V14.Iran                       -1.922e+00  1.135e+00  -1.693 0.090511 .  
## V14.Ireland                    -8.795e-01  1.248e+00  -0.705 0.480868    
## V14.Italy                      -6.023e-01  9.656e-01  -0.624 0.532777    
## V14.Jamaica                    -5.346e-01  9.737e-01  -0.549 0.582947    
## V14.Japan                      -8.683e-01  1.004e+00  -0.865 0.387015    
## V14.Laos                       -2.194e+00  1.435e+00  -1.528 0.126423    
## V14.Mexico                     -1.498e+00  8.854e-01  -1.691 0.090751 .  
## V14.Nicaragua                  -1.320e+00  1.182e+00  -1.117 0.264098    
## V14.Outlying.US.Guam.USVI.etc. -2.500e+01  1.811e+05   0.000 0.999890    
## V14.Peru                       -1.708e+00  1.441e+00  -1.185 0.235994    
## V14.Philippines                -6.762e-01  9.112e-01  -0.742 0.458005    
## V14.Poland                     -9.216e-01  9.850e-01  -0.936 0.349454    
## V14.Portugal                   -1.939e+00  1.388e+00  -1.397 0.162338    
## V14.Puerto.Rico                -1.003e+00  9.656e-01  -1.038 0.299097    
## V14.Scotland                   -7.464e-01  1.355e+00  -0.551 0.581801    
## V14.South                      -1.846e+00  1.019e+00  -1.812 0.070008 .  
## V14.Taiwan                     -1.104e+00  1.055e+00  -1.047 0.295222    
## V14.Thailand                   -1.627e+00  1.500e+00  -1.085 0.277885    
## V14.Trinadad.Tobago            -9.675e-01  1.215e+00  -0.796 0.425941    
## V14.United.States              -9.622e-01  8.492e-01  -1.133 0.257193    
## V14.Vietnam                    -1.750e+00  1.071e+00  -1.634 0.102245    
## V14.Yugoslavia                         NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 14233  on 13239  degrees of freedom
## Residual deviance: 10681  on 13142  degrees of freedom
## AIC: 10877
## 
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_PC_5.50.5_n3_LrFit0,50) + ggtitle("Adult_TDA_PCA_5.50.5_n3_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_PC_5.50.5_n3_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n3_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n3_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n3_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   5130  1739
##      >50K    2286   613
##                                           
##                Accuracy : 0.5879          
##                  95% CI : (0.5781, 0.5977)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : -0.0441         
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.6917          
##             Specificity : 0.2606          
##          Pos Pred Value : 0.7468          
##          Neg Pred Value : 0.2115          
##              Prevalence : 0.7592          
##          Detection Rate : 0.5252          
##    Detection Prevalence : 0.7032          
##       Balanced Accuracy : 0.4762          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n3_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   5130  1739
##      >50K    2286   613
##                                           
##                Accuracy : 0.5879          
##                  95% CI : (0.5781, 0.5977)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : -0.0441         
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.6917          
##             Specificity : 0.2606          
##          Pos Pred Value : 0.7468          
##          Neg Pred Value : 0.2115          
##              Prevalence : 0.7592          
##          Detection Rate : 0.5252          
##    Detection Prevalence : 0.7032          
##       Balanced Accuracy : 0.4762          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n3_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   5.879402e-01  -4.411987e-02   5.781030e-01   5.977249e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.000000e+00   7.554508e-18
ad_tda_pc_5.50.5_n3_lr_cf0_ov_acc<-ad_tda_pc_5.50.5_n3_lr_cf0$overall[1]
ad_tda_pc_5.50.5_n3_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.6917476            0.2606293            0.7468336 
##       Neg Pred Value            Precision               Recall 
##            0.2114522            0.7468336            0.6917476 
##                   F1           Prevalence       Detection Rate 
##            0.7182359            0.7592138            0.5251843 
## Detection Prevalence    Balanced Accuracy 
##            0.7032146            0.4761884
ad_tda_pc_5.50.5_n3_lr_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n3_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers

### 3-fold diff

diff_tda_pca_5.50.5_lr_n3_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.50.5_n3_lr_fit_re)
diff_tda_pca_5.50.5_lr_n3_3_fold
##    Accuracy
## 1 0.1284116
## 2 0.1273210
## 3 0.1438135
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_lr.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n3_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_lr.n3_3_fold_odds.left<-bst_tda_pca_5.50.5_lr.n3_3_fold$probLeft/bst_tda_pca_5.50.5_lr.n3_3_fold$probRight
bst_tda_pca_5.50.5_lr.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_lr.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n3_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.009366667
## 
## $winRight
## [1] 0.9906333
# Bayesian Correlated Test

bct_tda_pca_5.50.5_lr.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n3_3_fold
## $left
## [1] 0.0009195529
## 
## $rope
## [1] 0.000321639
## 
## $right
## [1] 0.9987588
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_lr_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_lr.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr_n3_3_fold))
#bf_tda_pca_5.50.5_lr.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_lr_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_lr_n3_3_fold)
## t = 25.011, df = 2, p-value = 0.001595
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.1102703 0.1560938
## sample estimates:
## mean of x 
##  0.133182
### Test set diff
diff_tda_pca_5.50.5_lr.n3_test<-(lr_cf_ov_acc - ad_tda_pc_5.50.5_n3_lr_cf0_ov_acc)
diff_tda_pca_5.50.5_lr.n3_test
##  Accuracy 
## 0.2646396
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_lr.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr.n3_test),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n3_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_lr.n3_test_odds.left<-bst_tda_pca_5.50.5_lr.n3_test$probLeft/bst_tda_pca_5.50.5_lr.n3_test$probRight
bst_tda_pca_5.50.5_lr.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_lr.n3_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr.n3_test),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n3_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1563
## 
## $winRight
## [1] 0.8437
# Bayesian Correlated Test

bct_tda_pca_5.50.5_lr.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_lr.n3_test)))

#BayesFactor
#bf_tda_pca_5.50.5_lr.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr.n3_test)) #bf_tda_pca_5.50.5_lr.n3_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_lr.n3_test))

##Node4

Adult_TDA_PC_5.50.5_n4_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_adult_5.50.5.n4.vec, 
                      family = 'binomial',
                            method = 'glm', 
                      trControl = fitControl,
                            metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.50.5_n4_LrFit0
## Generalized Linear Model 
## 
## 16700 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 11134, 11133, 11133 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.8176725  0.2025869
Adult_TDA_PC_5.50.5_n4_LrFit0$resample
##    Accuracy      Kappa Resample
## 1 0.9482573 0.23648689    Fold1
## 2 0.5523621 0.07866163    Fold2
## 3 0.9523981 0.29261226    Fold3
ad_tda_pc_5.50.5_n4_lr_fit_re<-Adult_TDA_PC_5.50.5_n4_LrFit0$resample[1]

summary(Adult_TDA_PC_5.50.5_n4_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (9 not defined because of singularities)
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                     5.026e+12  1.131e+13   0.445 0.656642    
## V1                              2.109e-02  3.735e-03   5.647 1.63e-08 ***
## V2..                           -5.026e+12  1.131e+13  -0.445 0.656642    
## V2.Federal.gov                 -5.026e+12  1.131e+13  -0.445 0.656642    
## V2.Local.gov                   -5.026e+12  1.131e+13  -0.445 0.656642    
## V2.Never.worked                -5.026e+12  1.131e+13  -0.445 0.656642    
## V2.Private                     -5.026e+12  1.131e+13  -0.445 0.656642    
## V2.Self.emp.inc                -5.026e+12  1.131e+13  -0.445 0.656642    
## V2.Self.emp.not.inc            -5.026e+12  1.131e+13  -0.445 0.656642    
## V2.State.gov                   -5.026e+12  1.131e+13  -0.445 0.656642    
## V2.Without.pay                 -5.026e+12  1.131e+13  -0.445 0.656642    
## V3                              7.685e-07  3.631e-07   2.116 0.034327 *  
## V4.10th                        -1.539e+00  4.888e-01  -3.148 0.001642 ** 
## V4.11th                        -3.349e-01  3.036e-01  -1.103 0.270076    
## V4.12th                        -8.699e-01  4.911e-01  -1.771 0.076505 .  
## V4.1st.4th                     -2.287e+01  2.637e+04  -0.001 0.999308    
## V4.5th.6th                     -9.827e-01  6.698e-01  -1.467 0.142306    
## V4.7th.8th                     -9.913e-01  4.586e-01  -2.162 0.030647 *  
## V4.9th                         -3.022e-01  4.128e-01  -0.732 0.464162    
## V4.Assoc.acdm                  -1.521e-02  1.939e-01  -0.078 0.937472    
## V4.Assoc.voc                   -2.068e-02  1.890e-01  -0.109 0.912866    
## V4.Bachelors                    3.228e-01  1.220e-01   2.646 0.008142 ** 
## V4.Doctorate                    1.209e+00  3.656e-01   3.307 0.000943 ***
## V4.HS.grad                     -4.158e-01  1.105e-01  -3.763 0.000168 ***
## V4.Masters                      4.200e-01  1.832e-01   2.292 0.021895 *  
## V4.Preschool                   -2.166e+02  1.119e+07   0.000 0.999985    
## V4.Prof.school                  5.598e-01  3.690e-01   1.517 0.129205    
## V4.Some.college                        NA         NA      NA       NA    
## V5                                     NA         NA      NA       NA    
## V6.Divorced                     1.284e-02  1.847e-01   0.070 0.944562    
## V6.Married.AF.spouse            3.113e+00  7.265e-01   4.285 1.83e-05 ***
## V6.Married.civ.spouse           1.928e+00  4.073e-01   4.734 2.20e-06 ***
## V6.Married.spouse.absent        4.426e-02  3.222e-01   0.137 0.890729    
## V6.Never.married               -2.178e-01  2.053e-01  -1.061 0.288801    
## V6.Separated                   -3.133e-01  2.699e-01  -1.161 0.245690    
## V6.Widowed                             NA         NA      NA       NA    
## V7..                                   NA         NA      NA       NA    
## V7.Adm.clerical                 5.661e-02  2.573e-01   0.220 0.825873    
## V7.Armed.Forces                -2.308e+01  1.302e+05   0.000 0.999859    
## V7.Craft.repair                 1.553e-01  2.726e-01   0.570 0.568930    
## V7.Exec.managerial              2.120e-01  2.630e-01   0.806 0.420201    
## V7.Farming.fishing             -2.666e+00  1.019e+00  -2.615 0.008920 ** 
## V7.Handlers.cleaners           -7.790e-01  4.203e-01  -1.853 0.063838 .  
## V7.Machine.op.inspct           -7.407e-01  3.248e-01  -2.280 0.022592 *  
## V7.Other.service               -5.282e-01  2.812e-01  -1.878 0.060362 .  
## V7.Priv.house.serv             -4.245e+00  2.429e+00  -1.748 0.080495 .  
## V7.Prof.specialty               3.726e-02  2.693e-01   0.138 0.889934    
## V7.Protective.serv              7.033e-01  3.679e-01   1.912 0.055919 .  
## V7.Sales                        4.649e-02  2.667e-01   0.174 0.861630    
## V7.Tech.support                 2.779e-01  3.017e-01   0.921 0.357044    
## V7.Transport.moving                    NA         NA      NA       NA    
## V8.Husband                      4.241e+02  3.242e+05   0.001 0.998956    
## V8.Not.in.family               -1.275e-01  3.771e-01  -0.338 0.735328    
## V8.Other.relative              -1.607e+00  3.870e-01  -4.153 3.28e-05 ***
## V8.Own.child                   -1.321e+00  3.610e-01  -3.658 0.000254 ***
## V8.Unmarried                   -1.855e-01  3.874e-01  -0.479 0.632133    
## V8.Wife                                NA         NA      NA       NA    
## V9.Amer.Indian.Eskimo           2.881e-01  3.548e-01   0.812 0.416757    
## V9.Asian.Pac.Islander           4.901e-01  2.870e-01   1.707 0.087742 .  
## V9.Black                        1.134e-01  1.326e-01   0.855 0.392497    
## V9.Other                        6.152e-02  4.913e-01   0.125 0.900359    
## V9.White                               NA         NA      NA       NA    
## V10.Female                      2.783e-01  1.074e-01   2.590 0.009585 ** 
## V10.Male                               NA         NA      NA       NA    
## V11                             3.618e-04  1.712e-05  21.131  < 2e-16 ***
## V12                             3.504e-04  9.101e-05   3.850 0.000118 ***
## V13                             2.382e-02  3.499e-03   6.809 9.82e-12 ***
## V14..                          -2.180e+00  1.310e+00  -1.665 0.095997 .  
## V14.Cambodia                   -2.357e+01  4.374e+04  -0.001 0.999570    
## V14.Canada                     -2.264e+00  1.406e+00  -1.610 0.107299    
## V14.China                      -2.090e+00  1.447e+00  -1.444 0.148672    
## V14.Columbia                   -2.527e+01  5.220e+04   0.000 0.999614    
## V14.Cuba                       -2.811e+00  1.498e+00  -1.877 0.060565 .  
## V14.Dominican.Republic         -2.739e+00  1.671e+00  -1.639 0.101231    
## V14.Ecuador                    -2.487e+01  8.388e+04   0.000 0.999763    
## V14.El.Salvador                -2.753e+00  1.689e+00  -1.630 0.103163    
## V14.England                    -2.255e+00  1.407e+00  -1.603 0.108979    
## V14.France                     -2.673e+00  1.776e+00  -1.505 0.132281    
## V14.Germany                    -2.223e+00  1.377e+00  -1.614 0.106540    
## V14.Greece                     -1.724e+00  1.752e+00  -0.984 0.325093    
## V14.Guatemala                  -4.375e-01  1.506e+00  -0.291 0.771422    
## V14.Haiti                      -2.640e+00  1.704e+00  -1.549 0.121412    
## V14.Holand.Netherlands         -2.397e+01  3.370e+05   0.000 0.999943    
## V14.Honduras                   -2.508e+01  1.011e+05   0.000 0.999802    
## V14.Hong                       -2.605e+01  8.869e+04   0.000 0.999766    
## V14.Hungary                    -1.846e+00  1.721e+00  -1.073 0.283395    
## V14.India                      -2.515e+00  1.526e+00  -1.648 0.099286 .  
## V14.Iran                       -2.600e+01  8.747e+04   0.000 0.999763    
## V14.Ireland                    -1.923e+00  1.673e+00  -1.150 0.250346    
## V14.Italy                      -1.033e+00  1.412e+00  -0.732 0.464434    
## V14.Jamaica                    -1.851e+00  1.526e+00  -1.212 0.225324    
## V14.Japan                      -1.023e+00  1.404e+00  -0.729 0.466262    
## V14.Laos                       -2.286e+00  1.742e+00  -1.312 0.189437    
## V14.Mexico                     -2.746e+00  1.369e+00  -2.006 0.044837 *  
## V14.Nicaragua                  -1.642e+00  1.723e+00  -0.953 0.340633    
## V14.Outlying.US.Guam.USVI.etc. -2.619e+01  9.667e+04   0.000 0.999784    
## V14.Peru                       -2.527e+01  7.152e+04   0.000 0.999718    
## V14.Philippines                -2.303e+00  1.370e+00  -1.681 0.092735 .  
## V14.Poland                     -2.459e+00  1.651e+00  -1.489 0.136359    
## V14.Portugal                   -1.331e+00  1.537e+00  -0.866 0.386319    
## V14.Puerto.Rico                -2.014e+00  1.384e+00  -1.455 0.145592    
## V14.Scotland                   -2.578e+00  1.821e+00  -1.416 0.156909    
## V14.South                      -3.242e+00  1.538e+00  -2.108 0.035001 *  
## V14.Taiwan                     -1.898e+00  1.527e+00  -1.243 0.213856    
## V14.Thailand                   -2.681e+01  1.114e+05   0.000 0.999808    
## V14.Trinadad.Tobago            -2.591e+01  1.070e+05   0.000 0.999807    
## V14.United.States              -2.274e+00  1.281e+00  -1.776 0.075762 .  
## V14.Vietnam                    -2.216e+00  1.562e+00  -1.419 0.155991    
## V14.Yugoslavia                         NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance:   7122.1  on 16699  degrees of freedom
## Residual deviance: 130876.7  on 16600  degrees of freedom
## AIC: 131077
## 
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_PC_5.50.5_n4_LrFit0,50) + ggtitle("Adult_TDA_PCA_5.50.5_n4_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_PC_5.50.5_n4_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n4_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n4_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n4_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   5233   442
##      >50K    2183  1910
##                                         
##                Accuracy : 0.7313        
##                  95% CI : (0.7224, 0.74)
##     No Information Rate : 0.7592        
##     P-Value [Acc > NIR] : 1             
##                                         
##                   Kappa : 0.4133        
##                                         
##  Mcnemar's Test P-Value : <2e-16        
##                                         
##             Sensitivity : 0.7056        
##             Specificity : 0.8121        
##          Pos Pred Value : 0.9221        
##          Neg Pred Value : 0.4667        
##              Prevalence : 0.7592        
##          Detection Rate : 0.5357        
##    Detection Prevalence : 0.5810        
##       Balanced Accuracy : 0.7589        
##                                         
##        'Positive' Class :  <=50K        
## 
ad_tda_pc_5.50.5_n4_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   5233   442
##      >50K    2183  1910
##                                         
##                Accuracy : 0.7313        
##                  95% CI : (0.7224, 0.74)
##     No Information Rate : 0.7592        
##     P-Value [Acc > NIR] : 1             
##                                         
##                   Kappa : 0.4133        
##                                         
##  Mcnemar's Test P-Value : <2e-16        
##                                         
##             Sensitivity : 0.7056        
##             Specificity : 0.8121        
##          Pos Pred Value : 0.9221        
##          Neg Pred Value : 0.4667        
##              Prevalence : 0.7592        
##          Detection Rate : 0.5357        
##    Detection Prevalence : 0.5810        
##       Balanced Accuracy : 0.7589        
##                                         
##        'Positive' Class :  <=50K        
## 
ad_tda_pc_5.50.5_n4_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.312654e-01   4.132667e-01   7.223549e-01   7.400377e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.000000e+00  8.301493e-253
ad_tda_pc_5.50.5_n4_lr_cf0_ov_acc<-ad_tda_pc_5.50.5_n4_lr_cf0$overall[1]
ad_tda_pc_5.50.5_n4_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.7056365            0.8120748            0.9221145 
##       Neg Pred Value            Precision               Recall 
##            0.4666504            0.9221145            0.7056365 
##                   F1           Prevalence       Detection Rate 
##            0.7994806            0.7592138            0.5357289 
## Detection Prevalence    Balanced Accuracy 
##            0.5809787            0.7588556
ad_tda_pc_5.50.5_n4_lr_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n4_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers

### 3-fold diff

diff_tda_pca_5.50.5_lr_n4_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.50.5_n4_lr_fit_re)
diff_tda_pca_5.50.5_lr_n4_3_fold
##      Accuracy
## 1 -0.09713183
## 2  0.29707193
## 3 -0.10283238
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_lr.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n4_3_fold
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_lr.n4_3_fold_odds.left<-bst_tda_pca_5.50.5_lr.n4_3_fold$probLeft/bst_tda_pca_5.50.5_lr.n4_3_fold$probRight
bst_tda_pca_5.50.5_lr.n4_3_fold_odds.left
## [1] 2
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_lr.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n4_3_fold
## $winLeft
## [1] 0.5369333
## 
## $winRope
## [1] 0.0158
## 
## $winRight
## [1] 0.4472667
# Bayesian Correlated Test

bct_tda_pca_5.50.5_lr.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n4_3_fold
## $left
## [1] 0.4038195
## 
## $rope
## [1] 0.0447096
## 
## $right
## [1] 0.5514709
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_lr_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_lr.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr_n4_3_fold))
#bf_tda_pca_5.50.5_lr.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_lr_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_lr_n4_3_fold)
## t = 0.24455, df = 2, p-value = 0.8296
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.5371367  0.6018751
## sample estimates:
##  mean of x 
## 0.03236924
### Test set diff
diff_tda_pca_5.50.5_lr.n4_test<-(lr_cf_ov_acc - ad_tda_pc_5.50.5_n4_lr_cf0_ov_acc)
diff_tda_pca_5.50.5_lr.n4_test
##  Accuracy 
## 0.1213145
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_lr.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr.n4_test),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_lr.n4_test_odds.left<-bst_tda_pca_5.50.5_lr.n4_test$probLeft/bst_tda_pca_5.50.5_lr.n4_test$probRight
bst_tda_pca_5.50.5_lr.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_lr.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr.n4_test),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1547333
## 
## $winRight
## [1] 0.8452667
# Bayesian Correlated Test

bct_tda_pca_5.50.5_lr.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_lr.n4_test)))

#BayesFactor
#bf_tda_pca_5.50.5_lr.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr.n4_test)) #bf_tda_pca_5.50.5_lr.n4_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_lr.n4_test))

##Node5

Adult_TDA_PC_5.50.5_n5_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_adult_5.50.5.n5.vec, 
                      family = 'binomial',
                            method = 'glm', 
                      trControl = fitControl,
                            metric='Accuracy')
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.50.5_n5_LrFit0
## Generalized Linear Model 
## 
## 14404 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 9604, 9602, 9602 
## Resampling results:
## 
##   Accuracy   Kappa     
##   0.9684156  0.01281264
Adult_TDA_PC_5.50.5_n5_LrFit0$resample
##    Accuracy         Kappa Resample
## 1 0.9975000 -0.0009383797    Fold1
## 2 0.9162849 -0.0040778877    Fold2
## 3 0.9914619  0.0434541797    Fold3
ad_tda_pc_5.50.5_n5_lr_fit_re<-Adult_TDA_PC_5.50.5_n5_LrFit0$resample[1]

summary(Adult_TDA_PC_5.50.5_n5_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (11 not defined because of singularities)
##                                  Estimate Std. Error    z value Pr(>|z|)    
## (Intercept)                    -2.162e+15  4.232e+07  -51101694   <2e-16 ***
## V1                              2.376e+12  6.085e+04   39044136   <2e-16 ***
## V2..                           -2.941e+15  2.417e+07 -121660805   <2e-16 ***
## V2.Federal.gov                 -1.699e+14  2.417e+07   -7030029   <2e-16 ***
## V2.Local.gov                   -2.387e+15  2.400e+07  -99452186   <2e-16 ***
## V2.Never.worked                -5.359e+14  3.500e+07  -15312477   <2e-16 ***
## V2.Private                     -1.711e+15  2.386e+07  -71711904   <2e-16 ***
## V2.Self.emp.inc                -1.060e+15  2.524e+07  -42001880   <2e-16 ***
## V2.Self.emp.not.inc            -2.324e+15  2.413e+07  -96314772   <2e-16 ***
## V2.State.gov                   -2.249e+15  2.405e+07  -93530750   <2e-16 ***
## V2.Without.pay                         NA         NA         NA       NA    
## V3                              1.936e+08  5.347e+00   36197956   <2e-16 ***
## V4.10th                         8.384e+14  3.033e+06  276394991   <2e-16 ***
## V4.11th                        -2.572e+14  2.616e+06  -98302261   <2e-16 ***
## V4.12th                         1.142e+15  3.992e+06  285991075   <2e-16 ***
## V4.1st.4th                      4.872e+14  7.471e+06   65209586   <2e-16 ***
## V4.5th.6th                      4.300e+14  5.646e+06   76171359   <2e-16 ***
## V4.7th.8th                     -2.477e+14  4.295e+06  -57658375   <2e-16 ***
## V4.9th                          8.353e+14  4.195e+06  199139001   <2e-16 ***
## V4.Assoc.acdm                   1.640e+15  3.285e+06  499144179   <2e-16 ***
## V4.Assoc.voc                    1.068e+13  3.097e+06    3446569   <2e-16 ***
## V4.Bachelors                    1.544e+15  2.227e+06  693399248   <2e-16 ***
## V4.Doctorate                    1.841e+15  2.545e+07   72331489   <2e-16 ***
## V4.HS.grad                     -2.490e+14  1.468e+06 -169661737   <2e-16 ***
## V4.Masters                     -6.746e+14  4.802e+06 -140470411   <2e-16 ***
## V4.Preschool                    5.415e+14  1.147e+07   47195269   <2e-16 ***
## V4.Prof.school                  1.703e+15  1.519e+07  112129727   <2e-16 ***
## V4.Some.college                        NA         NA         NA       NA    
## V5                                     NA         NA         NA       NA    
## V6.Divorced                     4.259e+14  2.874e+06  148178073   <2e-16 ***
## V6.Married.AF.spouse           -1.543e+14  2.463e+07   -6265107   <2e-16 ***
## V6.Married.civ.spouse           1.217e+15  7.595e+06  160187500   <2e-16 ***
## V6.Married.spouse.absent        1.506e+13  4.792e+06    3142987   <2e-16 ***
## V6.Never.married                4.063e+14  3.141e+06  129339448   <2e-16 ***
## V6.Separated                    3.580e+14  3.572e+06  100204898   <2e-16 ***
## V6.Widowed                             NA         NA         NA       NA    
## V7..                                   NA         NA         NA       NA    
## V7.Adm.clerical                -1.017e+15  3.743e+06 -271623876   <2e-16 ***
## V7.Armed.Forces                -1.176e+15  3.396e+07  -34625127   <2e-16 ***
## V7.Craft.repair                 6.660e+14  4.032e+06  165173011   <2e-16 ***
## V7.Exec.managerial             -2.748e+14  4.233e+06  -64926351   <2e-16 ***
## V7.Farming.fishing              7.219e+14  5.332e+06  135380403   <2e-16 ***
## V7.Handlers.cleaners            7.555e+14  4.069e+06  185659303   <2e-16 ***
## V7.Machine.op.inspct            5.763e+13  4.033e+06   14288309   <2e-16 ***
## V7.Other.service                3.967e+13  3.697e+06   10731345   <2e-16 ***
## V7.Priv.house.serv             -7.565e+14  6.687e+06 -113133292   <2e-16 ***
## V7.Prof.specialty              -2.301e+14  4.284e+06  -53698560   <2e-16 ***
## V7.Protective.serv              5.842e+13  6.468e+06    9032610   <2e-16 ***
## V7.Sales                        1.079e+14  3.869e+06   27884603   <2e-16 ***
## V7.Tech.support                -1.244e+15  4.879e+06 -255023701   <2e-16 ***
## V7.Transport.moving                    NA         NA         NA       NA    
## V8.Husband                             NA         NA         NA       NA    
## V8.Not.in.family                1.050e+15  7.815e+06  134312655   <2e-16 ***
## V8.Other.relative               6.155e+14  7.766e+06   79260148   <2e-16 ***
## V8.Own.child                    6.824e+14  7.775e+06   87770147   <2e-16 ***
## V8.Unmarried                    1.163e+15  7.891e+06  147418645   <2e-16 ***
## V8.Wife                                NA         NA         NA       NA    
## V9.Amer.Indian.Eskimo           7.955e+14  5.009e+06  158791158   <2e-16 ***
## V9.Asian.Pac.Islander           5.606e+14  4.575e+06  122534488   <2e-16 ***
## V9.Black                        5.857e+13  1.646e+06   35588559   <2e-16 ***
## V9.Other                       -1.631e+14  5.283e+06  -30878904   <2e-16 ***
## V9.White                               NA         NA         NA       NA    
## V10.Female                      1.778e+15  1.416e+06 1256171611   <2e-16 ***
## V10.Male                               NA         NA         NA       NA    
## V11                             2.519e+10  6.411e+02   39287154   <2e-16 ***
## V12                             6.275e+11  2.151e+03  291765144   <2e-16 ***
## V13                            -4.315e+11  5.322e+04   -8106976   <2e-16 ***
## V14..                          -1.353e+15  3.389e+07  -39929374   <2e-16 ***
## V14.Cambodia                   -1.630e+15  4.235e+07  -38482872   <2e-16 ***
## V14.Canada                     -1.194e+15  3.516e+07  -33962427   <2e-16 ***
## V14.China                      -3.368e+15  3.680e+07  -91523027   <2e-16 ***
## V14.Columbia                   -9.336e+14  3.529e+07  -26452174   <2e-16 ***
## V14.Cuba                       -1.658e+15  3.517e+07  -47133098   <2e-16 ***
## V14.Dominican.Republic         -7.311e+14  3.493e+07  -20929588   <2e-16 ***
## V14.Ecuador                    -8.851e+14  3.764e+07  -23516833   <2e-16 ***
## V14.El.Salvador                -1.400e+15  3.455e+07  -40520006   <2e-16 ***
## V14.England                    -9.049e+14  3.546e+07  -25518749   <2e-16 ***
## V14.France                     -1.156e+15  4.041e+07  -28616005   <2e-16 ***
## V14.Germany                    -1.720e+15  3.467e+07  -49613234   <2e-16 ***
## V14.Greece                     -1.254e+15  4.509e+07  -27813651   <2e-16 ***
## V14.Guatemala                  -1.687e+15  3.503e+07  -48141120   <2e-16 ***
## V14.Haiti                      -1.119e+15  3.557e+07  -31471154   <2e-16 ***
## V14.Holand.Netherlands         -4.936e+15  7.528e+07  -65570303   <2e-16 ***
## V14.Honduras                   -1.554e+15  3.884e+07  -40018740   <2e-16 ***
## V14.Hong                       -1.433e+15  4.054e+07  -35339499   <2e-16 ***
## V14.Hungary                    -1.455e+14  4.509e+07   -3227333   <2e-16 ***
## V14.India                      -1.478e+15  3.687e+07  -40085617   <2e-16 ***
## V14.Iran                       -1.369e+14  4.335e+07   -3157781   <2e-16 ***
## V14.Ireland                    -2.012e+15  3.844e+07  -52329979   <2e-16 ***
## V14.Italy                      -1.160e+15  3.683e+07  -31487069   <2e-16 ***
## V14.Jamaica                    -8.793e+14  3.479e+07  -25275953   <2e-16 ***
## V14.Japan                      -1.230e+15  3.659e+07  -33630842   <2e-16 ***
## V14.Laos                       -1.349e+15  4.002e+07  -33707314   <2e-16 ***
## V14.Mexico                     -1.540e+15  3.383e+07  -45503793   <2e-16 ***
## V14.Nicaragua                  -4.598e+14  3.682e+07  -12488819   <2e-16 ***
## V14.Outlying.US.Guam.USVI.etc. -1.919e+15  3.923e+07  -48921853   <2e-16 ***
## V14.Peru                       -3.046e+15  3.667e+07  -83061823   <2e-16 ***
## V14.Philippines                -3.528e+15  3.460e+07 -101957990   <2e-16 ***
## V14.Poland                     -3.354e+15  3.630e+07  -92405163   <2e-16 ***
## V14.Portugal                   -1.466e+15  3.760e+07  -38980594   <2e-16 ***
## V14.Puerto.Rico                -7.184e+14  3.454e+07  -20800085   <2e-16 ***
## V14.Scotland                   -3.348e+14  4.508e+07   -7426195   <2e-16 ***
## V14.South                      -1.565e+15  3.570e+07  -43840023   <2e-16 ***
## V14.Taiwan                     -3.826e+15  3.745e+07 -102162557   <2e-16 ***
## V14.Thailand                   -2.314e+15  3.946e+07  -58644892   <2e-16 ***
## V14.Trinadad.Tobago            -1.090e+15  3.926e+07  -27767055   <2e-16 ***
## V14.United.States              -2.224e+15  3.360e+07  -66189140   <2e-16 ***
## V14.Vietnam                    -1.710e+15  3.544e+07  -48267228   <2e-16 ***
## V14.Yugoslavia                         NA         NA         NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance:  418.0  on 14403  degrees of freedom
## Residual deviance: 5550.7  on 14306  degrees of freedom
## AIC: 5746.7
## 
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_PC_5.50.5_n5_LrFit0,50) + ggtitle("Adult_TDA_PCA_5.50.5_n5_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_PC_5.50.5_n5_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n5_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n5_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n5_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7398  2338
##      >50K      18    14
##                                           
##                Accuracy : 0.7588          
##                  95% CI : (0.7502, 0.7673)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5432          
##                                           
##                   Kappa : 0.0053          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.997573        
##             Specificity : 0.005952        
##          Pos Pred Value : 0.759860        
##          Neg Pred Value : 0.437500        
##              Prevalence : 0.759214        
##          Detection Rate : 0.757371        
##    Detection Prevalence : 0.996724        
##       Balanced Accuracy : 0.501763        
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n5_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7398  2338
##      >50K      18    14
##                                           
##                Accuracy : 0.7588          
##                  95% CI : (0.7502, 0.7673)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5432          
##                                           
##                   Kappa : 0.0053          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.997573        
##             Specificity : 0.005952        
##          Pos Pred Value : 0.759860        
##          Neg Pred Value : 0.437500        
##              Prevalence : 0.759214        
##          Detection Rate : 0.757371        
##    Detection Prevalence : 0.996724        
##       Balanced Accuracy : 0.501763        
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n5_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##    0.758804259    0.005315264    0.750192757    0.767261189    0.759213759 
## AccuracyPValue  McnemarPValue 
##    0.543204853    0.000000000
ad_tda_pc_5.50.5_n5_lr_cf0_ov_acc<-ad_tda_pc_5.50.5_n5_lr_cf0$overall[1]
ad_tda_pc_5.50.5_n5_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##          0.997572816          0.005952381          0.759860312 
##       Neg Pred Value            Precision               Recall 
##          0.437500000          0.759860312          0.997572816 
##                   F1           Prevalence       Detection Rate 
##          0.862639925          0.759213759          0.757371007 
## Detection Prevalence    Balanced Accuracy 
##          0.996723997          0.501762598
ad_tda_pc_5.50.5_n5_lr_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n5_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers

### 3-fold diff

diff_tda_pca_5.50.5_lr_n5_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.50.5_n5_lr_fit_re)
diff_tda_pca_5.50.5_lr_n5_3_fold
##      Accuracy
## 1 -0.14637456
## 2 -0.06685082
## 3 -0.14189622
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_lr.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n5_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_lr.n5_3_fold_odds.left<-bst_tda_pca_5.50.5_lr.n5_3_fold$probLeft/bst_tda_pca_5.50.5_lr.n5_3_fold$probRight
bst_tda_pca_5.50.5_lr.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_lr.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr_n5_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n5_3_fold
## $winLeft
## [1] 0.9919333
## 
## $winRope
## [1] 0.008066667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.50.5_lr.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n5_3_fold
## $left
## [1] 0.9660373
## 
## $rope
## [1] 0.009042925
## 
## $right
## [1] 0.02491979
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_lr_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_lr.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr_n5_3_fold))
#bf_tda_pca_5.50.5_lr.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_lr_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_lr_n5_3_fold)
## t = -4.5892, df = 2, p-value = 0.04435
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.2293562 -0.0073915
## sample estimates:
##  mean of x 
## -0.1183739
### Test set diff
diff_tda_pca_5.50.5_lr.n5_test<-(lr_cf_ov_acc - ad_tda_pc_5.50.5_n5_lr_cf0_ov_acc)
diff_tda_pca_5.50.5_lr.n5_test
##   Accuracy 
## 0.09377559
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_lr.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr.n5_test),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_lr.n5_test_odds.left<-bst_tda_pca_5.50.5_lr.n5_test$probLeft/bst_tda_pca_5.50.5_lr.n5_test$probRight
bst_tda_pca_5.50.5_lr.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_lr.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr.n5_test),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1584333
## 
## $winRight
## [1] 0.8415667
# Bayesian Correlated Test

bct_tda_pca_5.50.5_lr.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_lr.n5_test)))

#BayesFactor
#bf_tda_pca_5.50.5_lr.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr.n5_test)) #bf_tda_pca_5.50.5_lr.n5_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_lr.n5_test))


##With TDA KDE filter 5 intervals, 50% overlap, 5 bins 
##Node1


Adult_TDA_KDE_5.50.5_n1_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_kde_adult_5.50.5.n1.vec, 
                      family = 'binomial',
                            method = 'glm', 
                      trControl = fitControl,
                            metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.50.5_n1_LrFit0
## Generalized Linear Model 
## 
## 13387 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8924, 8924, 8926 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.8591161  0.6172397
Adult_TDA_KDE_5.50.5_n1_LrFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8579431 0.6154375    Fold1
## 2 0.8664575 0.6335638    Fold2
## 3 0.8529478 0.6027178    Fold3
ad_tda_kde_5.50.5_n1_lr_fit_re<-Adult_TDA_KDE_5.50.5_n1_LrFit0$resample[1]

summary(Adult_TDA_KDE_5.50.5_n1_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (9 not defined because of singularities)
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                    -2.158e+13  1.510e+13  -1.429 0.153005    
## V1                              1.441e-02  2.451e-03   5.878 4.15e-09 ***
## V2..                            2.158e+13  1.510e+13   1.429 0.153005    
## V2.Federal.gov                  2.158e+13  1.510e+13   1.429 0.153005    
## V2.Local.gov                    2.158e+13  1.510e+13   1.429 0.153005    
## V2.Never.worked                 2.158e+13  1.510e+13   1.429 0.153005    
## V2.Private                      2.158e+13  1.510e+13   1.429 0.153005    
## V2.Self.emp.inc                 2.158e+13  1.510e+13   1.429 0.153005    
## V2.Self.emp.not.inc             2.158e+13  1.510e+13   1.429 0.153005    
## V2.State.gov                    2.158e+13  1.510e+13   1.429 0.153005    
## V2.Without.pay                  2.158e+13  1.510e+13   1.429 0.153005    
## V3                              6.893e-07  2.127e-07   3.241 0.001190 ** 
## V4.10th                        -1.169e+00  1.707e-01  -6.848 7.49e-12 ***
## V4.11th                        -1.106e+00  1.708e-01  -6.476 9.41e-11 ***
## V4.12th                        -8.562e-01  3.188e-01  -2.686 0.007236 ** 
## V4.1st.4th                     -1.741e+00  4.843e-01  -3.595 0.000325 ***
## V4.5th.6th                     -1.482e+00  3.069e-01  -4.828 1.38e-06 ***
## V4.7th.8th                     -1.618e+00  1.957e-01  -8.268  < 2e-16 ***
## V4.9th                         -1.325e+00  2.285e-01  -5.797 6.76e-09 ***
## V4.Assoc.acdm                   1.387e-01  1.601e-01   0.866 0.386287    
## V4.Assoc.voc                    1.032e-01  1.585e-01   0.651 0.515109    
## V4.Bachelors                    5.937e-01  9.984e-02   5.947 2.74e-09 ***
## V4.Doctorate                    1.835e+00  1.768e-01  10.379  < 2e-16 ***
## V4.HS.grad                     -4.623e-01  9.987e-02  -4.629 3.67e-06 ***
## V4.Masters                      1.019e+00  1.205e-01   8.462  < 2e-16 ***
## V4.Preschool                   -3.217e+01  6.329e+04  -0.001 0.999594    
## V4.Prof.school                  1.589e+00  1.577e-01  10.082  < 2e-16 ***
## V4.Some.college                        NA         NA      NA       NA    
## V5                                     NA         NA      NA       NA    
## V6.Divorced                    -2.860e-01  1.855e-01  -1.542 0.122995    
## V6.Married.AF.spouse            2.264e+00  8.805e-01   2.571 0.010128 *  
## V6.Married.civ.spouse           1.810e+00  4.553e-01   3.976 7.01e-05 ***
## V6.Married.spouse.absent       -5.586e-01  3.806e-01  -1.468 0.142147    
## V6.Never.married               -8.128e-01  1.962e-01  -4.143 3.43e-05 ***
## V6.Separated                   -3.280e-01  2.735e-01  -1.199 0.230452    
## V6.Widowed                             NA         NA      NA       NA    
## V7..                                   NA         NA      NA       NA    
## V7.Adm.clerical                -2.085e-01  1.650e-01  -1.263 0.206432    
## V7.Armed.Forces                -1.677e-01  2.205e+00  -0.076 0.939391    
## V7.Craft.repair                 7.394e-02  1.413e-01   0.523 0.600816    
## V7.Exec.managerial              7.304e-01  1.424e-01   5.130 2.90e-07 ***
## V7.Farming.fishing             -1.036e+00  2.129e-01  -4.866 1.14e-06 ***
## V7.Handlers.cleaners           -8.105e-01  2.508e-01  -3.232 0.001231 ** 
## V7.Machine.op.inspct           -6.159e-01  1.917e-01  -3.213 0.001312 ** 
## V7.Other.service               -1.049e+00  2.076e-01  -5.053 4.35e-07 ***
## V7.Priv.house.serv             -2.332e+01  2.443e+04  -0.001 0.999238    
## V7.Prof.specialty               3.653e-01  1.491e-01   2.450 0.014300 *  
## V7.Protective.serv              1.828e-01  2.277e-01   0.803 0.422010    
## V7.Sales                        1.044e-01  1.498e-01   0.697 0.485734    
## V7.Tech.support                 5.809e-01  2.033e-01   2.857 0.004278 ** 
## V7.Transport.moving                    NA         NA      NA       NA    
## V8.Husband                     -1.270e+00  1.565e-01  -8.118 4.75e-16 ***
## V8.Not.in.family               -6.745e-01  4.429e-01  -1.523 0.127787    
## V8.Other.relative              -1.488e+00  4.241e-01  -3.509 0.000450 ***
## V8.Own.child                   -1.820e+00  4.592e-01  -3.964 7.36e-05 ***
## V8.Unmarried                   -6.297e-01  4.557e-01  -1.382 0.166968    
## V8.Wife                                NA         NA      NA       NA    
## V9.Amer.Indian.Eskimo          -1.743e-01  3.185e-01  -0.547 0.584288    
## V9.Asian.Pac.Islander           1.859e-01  2.667e-01   0.697 0.485786    
## V9.Black                       -9.233e-02  1.164e-01  -0.793 0.427751    
## V9.Other                        4.619e-01  3.857e-01   1.198 0.231052    
## V9.White                               NA         NA      NA       NA    
## V10.Female                     -8.920e-01  1.133e-01  -7.872 3.49e-15 ***
## V10.Male                               NA         NA      NA       NA    
## V11                             3.005e-04  1.528e-05  19.668  < 2e-16 ***
## V12                             6.511e-04  5.583e-05  11.662  < 2e-16 ***
## V13                             3.122e-02  2.322e-03  13.445  < 2e-16 ***
## V14..                          -1.280e+00  1.212e+00  -1.056 0.290857    
## V14.Cambodia                    3.755e-01  1.680e+00   0.223 0.823174    
## V14.Canada                     -9.345e-01  1.246e+00  -0.750 0.453395    
## V14.China                      -2.114e+00  1.306e+00  -1.619 0.105527    
## V14.Columbia                   -2.620e+00  1.497e+00  -1.749 0.080212 .  
## V14.Cuba                       -1.492e+00  1.276e+00  -1.169 0.242393    
## V14.Dominican.Republic         -2.395e+01  4.126e+04  -0.001 0.999537    
## V14.Ecuador                    -2.571e+00  1.971e+00  -1.305 0.191940    
## V14.El.Salvador                -1.926e+00  1.451e+00  -1.327 0.184408    
## V14.England                    -1.037e+00  1.265e+00  -0.819 0.412585    
## V14.France                     -9.263e-01  1.358e+00  -0.682 0.495292    
## V14.Germany                    -6.693e-01  1.267e+00  -0.528 0.597199    
## V14.Greece                     -1.402e+00  1.463e+00  -0.958 0.338079    
## V14.Guatemala                  -1.357e+00  1.556e+00  -0.872 0.383148    
## V14.Haiti                      -2.065e+00  1.898e+00  -1.088 0.276500    
## V14.Holand.Netherlands         -2.328e+01  3.075e+05   0.000 0.999940    
## V14.Honduras                   -1.547e+00  3.232e+00  -0.479 0.632153    
## V14.Hong                       -6.725e-01  1.451e+00  -0.463 0.643089    
## V14.Hungary                    -5.827e-01  1.639e+00  -0.355 0.722223    
## V14.India                      -1.689e+00  1.283e+00  -1.317 0.187963    
## V14.Iran                       -1.879e+00  1.447e+00  -1.298 0.194142    
## V14.Ireland                    -5.711e-01  1.894e+00  -0.302 0.762982    
## V14.Italy                       5.082e-01  1.255e+00   0.405 0.685609    
## V14.Jamaica                    -2.496e+00  1.621e+00  -1.540 0.123581    
## V14.Japan                      -1.754e-01  1.370e+00  -0.128 0.898101    
## V14.Laos                       -7.329e-01  1.570e+00  -0.467 0.640724    
## V14.Mexico                     -1.573e+00  1.219e+00  -1.291 0.196677    
## V14.Nicaragua                  -1.716e+00  1.449e+00  -1.184 0.236327    
## V14.Outlying.US.Guam.USVI.etc. -2.564e+01  1.160e+05   0.000 0.999824    
## V14.Peru                       -1.265e+00  1.641e+00  -0.771 0.440959    
## V14.Philippines                -1.387e+00  1.263e+00  -1.098 0.272027    
## V14.Poland                     -7.809e-01  1.367e+00  -0.571 0.567687    
## V14.Portugal                   -2.234e+00  1.721e+00  -1.298 0.194220    
## V14.Puerto.Rico                -4.165e-01  1.305e+00  -0.319 0.749604    
## V14.Scotland                    2.571e-01  1.775e+00   0.145 0.884829    
## V14.South                      -2.715e+00  1.388e+00  -1.956 0.050460 .  
## V14.Taiwan                     -5.682e-01  1.398e+00  -0.406 0.684404    
## V14.Thailand                   -1.812e+00  1.906e+00  -0.951 0.341798    
## V14.Trinadad.Tobago            -1.127e+00  1.723e+00  -0.654 0.512878    
## V14.United.States              -9.105e-01  1.193e+00  -0.763 0.445470    
## V14.Vietnam                    -1.315e+00  1.463e+00  -0.899 0.368657    
## V14.Yugoslavia                         NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 15320.8  on 13386  degrees of freedom
## Residual deviance:  8281.5  on 13287  degrees of freedom
## AIC: 8481.5
## 
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_KDE_5.50.5_n1_LrFit0,50) + ggtitle("Adult_TDA_KDE_5.50.5_n1_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_KDE_5.50.5_n1_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n1_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n1_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n1_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6966   980
##      >50K     450  1372
##                                           
##                Accuracy : 0.8536          
##                  95% CI : (0.8464, 0.8606)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5662          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9393          
##             Specificity : 0.5833          
##          Pos Pred Value : 0.8767          
##          Neg Pred Value : 0.7530          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7131          
##    Detection Prevalence : 0.8135          
##       Balanced Accuracy : 0.7613          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n1_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6966   980
##      >50K     450  1372
##                                           
##                Accuracy : 0.8536          
##                  95% CI : (0.8464, 0.8606)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5662          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9393          
##             Specificity : 0.5833          
##          Pos Pred Value : 0.8767          
##          Neg Pred Value : 0.7530          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7131          
##    Detection Prevalence : 0.8135          
##       Balanced Accuracy : 0.7613          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n1_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.536036e-01   5.662164e-01   8.464367e-01   8.605589e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  1.204810e-117   1.818713e-44
ad_tda_kde_5.50.5_n1_lr_cf0_ov_acc<-ad_tda_kde_5.50.5_n1_lr_cf0$overall[1]
ad_tda_kde_5.50.5_n1_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9393204            0.5833333            0.8766675 
##       Neg Pred Value            Precision               Recall 
##            0.7530187            0.8766675            0.9393204 
##                   F1           Prevalence       Detection Rate 
##            0.9069132            0.7592138            0.7131450 
## Detection Prevalence    Balanced Accuracy 
##            0.8134726            0.7613269
ad_tda_kde_5.50.5_n1_lr_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n1_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_lr_n1_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.50.5_n1_lr_fit_re)
diff_tda_kde_5.50.5_lr_n1_3_fold
##       Accuracy
## 1 -0.006817643
## 2 -0.017023478
## 3 -0.003382094
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_lr.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n1_3_fold
## $probLeft
## [1] 0.25
## 
## $probRope
## [1] 0.75
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_lr.n1_3_fold_odds.left<-bst_tda_kde_5.50.5_lr.n1_3_fold$probLeft/bst_tda_kde_5.50.5_lr.n1_3_fold$probRight
bst_tda_kde_5.50.5_lr.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_lr.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n1_3_fold
## $winLeft
## [1] 0.3270667
## 
## $winRope
## [1] 0.6729333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_lr.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n1_3_fold
## $left
## [1] 0.4314692
## 
## $rope
## [1] 0.5403568
## 
## $right
## [1] 0.02817394
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_lr_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_lr.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr_n1_3_fold))
#bf_tda_kde_5.50.5_lr.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_lr_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_lr_n1_3_fold)
## t = -2.2152, df = 2, p-value = 0.1571
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.026699808  0.008550997
## sample estimates:
##    mean of x 
## -0.009074405
### Test set diff
diff_tda_kde_5.50.5_lr.n1_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n1_lr_cf0_ov_acc)
diff_tda_kde_5.50.5_lr.n1_test
##    Accuracy 
## -0.02579853
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_lr.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr.n1_test),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n1_test
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_lr.n1_test_odds.left<-bst_tda_kde_5.50.5_lr.n1_test$probLeft/bst_tda_kde_5.50.5_lr.n1_test$probRight
bst_tda_kde_5.50.5_lr.n1_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_lr.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr.n1_test),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n1_test
## $winLeft
## [1] 0.8412667
## 
## $winRope
## [1] 0.1587333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_lr.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_lr.n1_test)))

#BayesFactor
#bf_tda_kde_5.50.5_lr.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr.n1_test)) #bf_tda_pca_5.50.5_lr.n1_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_lr.n1_test))


##With TDA KDE filter 5 intervals, 50% overlap, 5 bins 
##Node2

Adult_TDA_KDE_5.50.5_n2_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_kde_adult_5.50.5.n2.vec, 
                      family = 'binomial',
                            method = 'glm', 
                      trControl = fitControl,
                            metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.50.5_n2_LrFit0
## Generalized Linear Model 
## 
## 12638 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8425, 8425, 8426 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.8424592  0.5950167
Adult_TDA_KDE_5.50.5_n2_LrFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8438168 0.5961240    Fold1
## 2 0.8414432 0.5937312    Fold2
## 3 0.8421178 0.5951950    Fold3
ad_tda_kde_5.50.5_n2_lr_fit_re<-Adult_TDA_KDE_5.50.5_n2_LrFit0$resample[1]

summary(Adult_TDA_KDE_5.50.5_n2_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (15 not defined because of singularities)
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                     2.377e+12  5.353e+12   0.444 0.656994    
## V1                              4.019e-02  2.998e-03  13.407  < 2e-16 ***
## V2..                           -2.377e+12  5.353e+12  -0.444 0.656994    
## V2.Federal.gov                 -2.377e+12  5.353e+12  -0.444 0.656994    
## V2.Local.gov                   -2.377e+12  5.353e+12  -0.444 0.656994    
## V2.Never.worked                -2.377e+12  5.353e+12  -0.444 0.656994    
## V2.Private                     -2.377e+12  5.353e+12  -0.444 0.656994    
## V2.Self.emp.inc                -2.377e+12  5.353e+12  -0.444 0.656994    
## V2.Self.emp.not.inc            -2.377e+12  5.353e+12  -0.444 0.656994    
## V2.State.gov                   -2.377e+12  5.353e+12  -0.444 0.656994    
## V2.Without.pay                 -2.377e+12  5.353e+12  -0.444 0.656994    
## V3                              1.103e-06  3.002e-07   3.673 0.000240 ***
## V4.10th                        -1.293e+00  6.071e-01  -2.129 0.033222 *  
## V4.11th                        -7.382e-01  2.070e-01  -3.566 0.000363 ***
## V4.12th                        -6.992e-01  3.145e-01  -2.223 0.026183 *  
## V4.1st.4th                             NA         NA      NA       NA    
## V4.5th.6th                             NA         NA      NA       NA    
## V4.7th.8th                             NA         NA      NA       NA    
## V4.9th                                 NA         NA      NA       NA    
## V4.Assoc.acdm                   2.497e-01  1.403e-01   1.779 0.075165 .  
## V4.Assoc.voc                    1.348e-01  1.431e-01   0.942 0.346329    
## V4.Bachelors                    9.199e-01  9.162e-02  10.040  < 2e-16 ***
## V4.Doctorate                           NA         NA      NA       NA    
## V4.HS.grad                     -4.506e-01  8.894e-02  -5.067 4.05e-07 ***
## V4.Masters                      1.352e+00  1.141e-01  11.857  < 2e-16 ***
## V4.Preschool                           NA         NA      NA       NA    
## V4.Prof.school                  2.484e+00  2.158e-01  11.509  < 2e-16 ***
## V4.Some.college                        NA         NA      NA       NA    
## V5                                     NA         NA      NA       NA    
## V6.Divorced                    -4.044e-02  2.329e-01  -0.174 0.862155    
## V6.Married.AF.spouse            3.066e+00  9.421e-01   3.254 0.001137 ** 
## V6.Married.civ.spouse           2.088e+00  5.310e-01   3.932 8.43e-05 ***
## V6.Married.spouse.absent        2.967e-02  3.779e-01   0.079 0.937413    
## V6.Never.married               -5.803e-01  2.445e-01  -2.374 0.017602 *  
## V6.Separated                   -2.802e-01  3.178e-01  -0.882 0.377909    
## V6.Widowed                             NA         NA      NA       NA    
## V7..                                   NA         NA      NA       NA    
## V7.Adm.clerical                 2.207e-02  1.627e-01   0.136 0.892080    
## V7.Armed.Forces                -2.415e+01  3.607e+05   0.000 0.999947    
## V7.Craft.repair                 1.250e-01  1.443e-01   0.867 0.386187    
## V7.Exec.managerial              8.952e-01  1.436e-01   6.236 4.49e-10 ***
## V7.Farming.fishing             -6.954e-01  2.187e-01  -3.180 0.001473 ** 
## V7.Handlers.cleaners           -4.394e-01  2.579e-01  -1.704 0.088433 .  
## V7.Machine.op.inspct           -4.405e-01  1.941e-01  -2.270 0.023203 *  
## V7.Other.service               -1.026e+00  2.122e-01  -4.836 1.32e-06 ***
## V7.Priv.house.serv             -3.433e+00  2.526e+00  -1.359 0.174160    
## V7.Prof.specialty               5.321e-01  1.509e-01   3.527 0.000420 ***
## V7.Protective.serv              7.123e-01  2.190e-01   3.253 0.001142 ** 
## V7.Sales                        2.879e-01  1.495e-01   1.926 0.054125 .  
## V7.Tech.support                 5.911e-01  1.928e-01   3.066 0.002173 ** 
## V7.Transport.moving                    NA         NA      NA       NA    
## V8.Husband                     -1.440e+00  1.490e-01  -9.669  < 2e-16 ***
## V8.Not.in.family               -8.639e-01  4.964e-01  -1.740 0.081799 .  
## V8.Other.relative              -1.702e+00  4.234e-01  -4.021 5.81e-05 ***
## V8.Own.child                   -1.897e+00  5.021e-01  -3.777 0.000159 ***
## V8.Unmarried                   -9.347e-01  5.071e-01  -1.843 0.065260 .  
## V8.Wife                                NA         NA      NA       NA    
## V9.Amer.Indian.Eskimo          -5.421e-03  3.121e-01  -0.017 0.986139    
## V9.Asian.Pac.Islander          -1.005e-01  2.368e-01  -0.425 0.671122    
## V9.Black                        2.333e-03  1.215e-01   0.019 0.984688    
## V9.Other                        1.334e-01  4.064e-01   0.328 0.742657    
## V9.White                               NA         NA      NA       NA    
## V10.Female                     -8.916e-01  1.120e-01  -7.960 1.72e-15 ***
## V10.Male                               NA         NA      NA       NA    
## V11                             3.295e-04  1.625e-05  20.279  < 2e-16 ***
## V12                             7.852e-04  6.169e-05  12.727  < 2e-16 ***
## V13                             2.469e-02  2.664e-03   9.267  < 2e-16 ***
## V14..                          -4.861e-01  9.129e-01  -0.532 0.594403    
## V14.Cambodia                    1.803e+00  1.339e+00   1.346 0.178193    
## V14.Canada                     -5.672e-02  9.926e-01  -0.057 0.954429    
## V14.China                      -6.112e-01  1.074e+00  -0.569 0.569260    
## V14.Columbia                   -2.500e+01  6.303e+04   0.000 0.999684    
## V14.Cuba                        4.934e-02  1.016e+00   0.049 0.961261    
## V14.Dominican.Republic         -2.431e+01  6.589e+04   0.000 0.999706    
## V14.Ecuador                    -1.085e+00  1.642e+00  -0.661 0.508921    
## V14.El.Salvador                -2.494e-01  1.173e+00  -0.213 0.831555    
## V14.England                     5.221e-01  9.807e-01   0.532 0.594445    
## V14.France                      6.892e-01  1.203e+00   0.573 0.566757    
## V14.Germany                     7.369e-01  9.650e-01   0.764 0.445097    
## V14.Greece                     -1.341e+00  1.203e+00  -1.115 0.264954    
## V14.Guatemala                  -7.413e-01  2.216e+00  -0.334 0.738034    
## V14.Haiti                      -5.211e-01  1.222e+00  -0.427 0.669732    
## V14.Holand.Netherlands         -2.230e+01  3.364e+05   0.000 0.999947    
## V14.Honduras                   -2.309e+01  1.684e+05   0.000 0.999891    
## V14.Hong                        1.743e+00  1.363e+00   1.278 0.201085    
## V14.Hungary                     9.291e-01  1.599e+00   0.581 0.561302    
## V14.India                      -3.241e-01  9.738e-01  -0.333 0.739287    
## V14.Iran                       -9.642e-02  1.249e+00  -0.077 0.938485    
## V14.Ireland                    -2.406e+01  1.255e+05   0.000 0.999847    
## V14.Italy                       9.493e-01  1.030e+00   0.921 0.356945    
## V14.Jamaica                    -1.946e+00  1.368e+00  -1.423 0.154862    
## V14.Japan                       4.816e-01  1.096e+00   0.440 0.660258    
## V14.Laos                       -2.472e+01  1.183e+05   0.000 0.999833    
## V14.Mexico                     -6.107e-01  9.836e-01  -0.621 0.534691    
## V14.Nicaragua                  -2.426e+01  8.981e+04   0.000 0.999785    
## V14.Outlying.US.Guam.USVI.etc. -2.428e+01  1.889e+05   0.000 0.999897    
## V14.Peru                       -3.972e-01  1.436e+00  -0.277 0.782121    
## V14.Philippines                 6.763e-01  9.607e-01   0.704 0.481468    
## V14.Poland                      3.535e-01  1.087e+00   0.325 0.745094    
## V14.Portugal                   -2.412e+01  1.266e+05   0.000 0.999848    
## V14.Puerto.Rico                -3.442e-01  1.140e+00  -0.302 0.762585    
## V14.Scotland                    8.816e-03  1.404e+00   0.006 0.994991    
## V14.South                      -1.041e+00  1.069e+00  -0.974 0.330195    
## V14.Taiwan                      1.561e-01  1.085e+00   0.144 0.885539    
## V14.Thailand                   -8.911e-01  1.498e+00  -0.595 0.551914    
## V14.Trinadad.Tobago            -3.699e-01  1.763e+00  -0.210 0.833792    
## V14.United.States               1.391e-01  8.881e-01   0.157 0.875548    
## V14.Vietnam                    -1.970e+00  1.455e+00  -1.354 0.175700    
## V14.Yugoslavia                         NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 14967.3  on 12637  degrees of freedom
## Residual deviance:  8297.9  on 12544  degrees of freedom
## AIC: 8485.9
## 
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_KDE_5.50.5_n2_LrFit0,50) + ggtitle("Adult_TDA_KDE_5.50.5_n2_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_KDE_5.50.5_n2_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n2_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n2_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n2_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6907   966
##      >50K     509  1386
##                                          
##                Accuracy : 0.849          
##                  95% CI : (0.8417, 0.856)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.5576         
##                                          
##  Mcnemar's Test P-Value : < 2.2e-16      
##                                          
##             Sensitivity : 0.9314         
##             Specificity : 0.5893         
##          Pos Pred Value : 0.8773         
##          Neg Pred Value : 0.7314         
##              Prevalence : 0.7592         
##          Detection Rate : 0.7071         
##    Detection Prevalence : 0.8060         
##       Balanced Accuracy : 0.7603         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_kde_5.50.5_n2_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6907   966
##      >50K     509  1386
##                                          
##                Accuracy : 0.849          
##                  95% CI : (0.8417, 0.856)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.5576         
##                                          
##  Mcnemar's Test P-Value : < 2.2e-16      
##                                          
##             Sensitivity : 0.9314         
##             Specificity : 0.5893         
##          Pos Pred Value : 0.8773         
##          Neg Pred Value : 0.7314         
##              Prevalence : 0.7592         
##          Detection Rate : 0.7071         
##    Detection Prevalence : 0.8060         
##       Balanced Accuracy : 0.7603         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_kde_5.50.5_n2_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.489967e-01   5.576442e-01   8.417411e-01   8.560436e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  5.652237e-106   1.630512e-32
ad_tda_kde_5.50.5_n2_lr_cf0_ov_acc<-ad_tda_kde_5.50.5_n2_lr_cf0$overall[1]
ad_tda_kde_5.50.5_n2_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9313646            0.5892857            0.8773022 
##       Neg Pred Value            Precision               Recall 
##            0.7313984            0.8773022            0.9313646 
##                   F1           Prevalence       Detection Rate 
##            0.9035254            0.7592138            0.7071048 
## Detection Prevalence    Balanced Accuracy 
##            0.8059992            0.7603252
ad_tda_kde_5.50.5_n2_lr_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n2_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_lr_n2_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.50.5_n2_lr_fit_re)
diff_tda_kde_5.50.5_lr_n2_3_fold
##      Accuracy
## 1 0.007308687
## 2 0.007990909
## 3 0.007447916
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_lr.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_lr.n2_3_fold_odds.left<-bst_tda_kde_5.50.5_lr.n2_3_fold$probLeft/bst_tda_kde_5.50.5_lr.n2_3_fold$probRight
bst_tda_kde_5.50.5_lr.n2_3_fold_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_lr.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_lr.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n2_3_fold
## $left
## [1] 9.338029e-05
## 
## $rope
## [1] 0.9950378
## 
## $right
## [1] 0.004868857
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_lr_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_lr.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr_n2_3_fold))
#bf_tda_kde_5.50.5_lr.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_lr_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_lr_n2_3_fold)
## t = 36.433, df = 2, p-value = 0.0007525
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.006687034 0.008477974
## sample estimates:
##   mean of x 
## 0.007582504
### Test set diff
diff_tda_kde_5.50.5_lr.n2_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n2_lr_cf0_ov_acc)
diff_tda_kde_5.50.5_lr.n2_test
##    Accuracy 
## -0.02119165
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_lr.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr.n2_test),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n2_test
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_lr.n2_test_odds.left<-bst_tda_kde_5.50.5_lr.n2_test$probLeft/bst_tda_kde_5.50.5_lr.n2_test$probRight
bst_tda_kde_5.50.5_lr.n2_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_lr.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr.n2_test),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n2_test
## $winLeft
## [1] 0.8423333
## 
## $winRope
## [1] 0.1576667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_lr.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_lr.n2_test)))

#BayesFactor
#bf_tda_kde_5.50.5_lr.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr.n2_test)) #bf_tda_pca_5.50.5_lr.n2_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_lr.n2_test))

##Node3

Adult_TDA_KDE_5.50.5_n3_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_kde_adult_5.50.5.n3.vec, 
                      family = 'binomial',
                            method = 'glm', 
                      trControl = fitControl,
                            metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.50.5_n3_LrFit0
## Generalized Linear Model 
## 
## 11634 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 7757, 7756, 7755 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.8343636  0.5730016
Adult_TDA_KDE_5.50.5_n3_LrFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8279598 0.5554974    Fold1
## 2 0.8334193 0.5727216    Fold2
## 3 0.8417118 0.5907857    Fold3
ad_tda_kde_5.50.5_n3_lr_fit_re<-Adult_TDA_KDE_5.50.5_n2_LrFit0$resample[1]

summary(Adult_TDA_KDE_5.50.5_n3_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (18 not defined because of singularities)
##                                  Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)                    -1.210e+13  1.132e+13   -1.068 0.285486    
## V1                              5.716e-02  3.707e-03   15.419  < 2e-16 ***
## V2..                            1.210e+13  1.132e+13    1.068 0.285486    
## V2.Federal.gov                  1.210e+13  1.132e+13    1.068 0.285486    
## V2.Local.gov                    1.210e+13  1.132e+13    1.068 0.285486    
## V2.Never.worked                -4.492e+15  1.132e+13 -396.606  < 2e-16 ***
## V2.Private                      1.210e+13  1.132e+13    1.068 0.285486    
## V2.Self.emp.inc                 1.210e+13  1.132e+13    1.068 0.285486    
## V2.Self.emp.not.inc             1.210e+13  1.132e+13    1.068 0.285486    
## V2.State.gov                    1.210e+13  1.132e+13    1.068 0.285486    
## V2.Without.pay                  1.210e+13  1.132e+13    1.068 0.285486    
## V3                              1.058e-06  4.265e-07    2.480 0.013122 *  
## V4.10th                                NA         NA       NA       NA    
## V4.11th                        -2.141e+01  1.391e+05    0.000 0.999877    
## V4.12th                        -3.019e-01  3.262e-01   -0.926 0.354645    
## V4.1st.4th                             NA         NA       NA       NA    
## V4.5th.6th                             NA         NA       NA       NA    
## V4.7th.8th                             NA         NA       NA       NA    
## V4.9th                                 NA         NA       NA       NA    
## V4.Assoc.acdm                   4.043e-01  1.394e-01    2.901 0.003723 ** 
## V4.Assoc.voc                    3.090e-01  1.336e-01    2.313 0.020710 *  
## V4.Bachelors                    1.133e+00  9.265e-02   12.228  < 2e-16 ***
## V4.Doctorate                           NA         NA       NA       NA    
## V4.HS.grad                     -2.567e-01  8.674e-02   -2.959 0.003087 ** 
## V4.Masters                      1.542e+00  1.396e-01   11.048  < 2e-16 ***
## V4.Preschool                           NA         NA       NA       NA    
## V4.Prof.school                         NA         NA       NA       NA    
## V4.Some.college                        NA         NA       NA       NA    
## V5                                     NA         NA       NA       NA    
## V6.Divorced                    -2.761e-02  3.077e-01   -0.090 0.928514    
## V6.Married.AF.spouse            3.429e+00  1.082e+00    3.170 0.001525 ** 
## V6.Married.civ.spouse           2.257e+00  5.151e-01    4.382 1.18e-05 ***
## V6.Married.spouse.absent        1.001e-01  4.357e-01    0.230 0.818365    
## V6.Never.married               -3.595e-01  3.181e-01   -1.130 0.258384    
## V6.Separated                   -2.111e-01  3.891e-01   -0.543 0.587345    
## V6.Widowed                             NA         NA       NA       NA    
## V7..                                   NA         NA       NA       NA    
## V7.Adm.clerical                 2.406e-01  1.626e-01    1.480 0.139002    
## V7.Armed.Forces                -2.416e+01  1.729e+05    0.000 0.999888    
## V7.Craft.repair                 1.624e-01  1.439e-01    1.129 0.258979    
## V7.Exec.managerial              1.016e+00  1.458e-01    6.970 3.17e-12 ***
## V7.Farming.fishing             -8.835e-01  2.453e-01   -3.602 0.000316 ***
## V7.Handlers.cleaners           -4.339e-01  2.492e-01   -1.741 0.081600 .  
## V7.Machine.op.inspct           -5.824e-02  1.761e-01   -0.331 0.740869    
## V7.Other.service               -6.660e-01  2.097e-01   -3.176 0.001495 ** 
## V7.Priv.house.serv             -3.271e+00  2.440e+00   -1.341 0.180045    
## V7.Prof.specialty               6.993e-01  1.540e-01    4.541 5.60e-06 ***
## V7.Protective.serv              1.032e+00  2.144e-01    4.812 1.49e-06 ***
## V7.Sales                        6.145e-01  1.496e-01    4.106 4.02e-05 ***
## V7.Tech.support                 7.322e-01  1.889e-01    3.876 0.000106 ***
## V7.Transport.moving                    NA         NA       NA       NA    
## V8.Husband                     -1.456e+00  1.601e-01   -9.091  < 2e-16 ***
## V8.Not.in.family               -9.264e-01  4.390e-01   -2.110 0.034857 *  
## V8.Other.relative              -1.820e+00  3.930e-01   -4.629 3.67e-06 ***
## V8.Own.child                   -2.250e+00  4.251e-01   -5.293 1.20e-07 ***
## V8.Unmarried                   -1.128e+00  4.525e-01   -2.493 0.012654 *  
## V8.Wife                                NA         NA       NA       NA    
## V9.Amer.Indian.Eskimo          -4.874e-01  3.861e-01   -1.262 0.206910    
## V9.Asian.Pac.Islander           4.647e-02  2.241e-01    0.207 0.835706    
## V9.Black                       -1.506e-01  1.218e-01   -1.237 0.216218    
## V9.Other                       -6.531e-01  4.634e-01   -1.409 0.158715    
## V9.White                               NA         NA       NA       NA    
## V10.Female                     -8.174e-01  1.260e-01   -6.489 8.64e-11 ***
## V10.Male                               NA         NA       NA       NA    
## V11                             3.215e-04  1.673e-05   19.219  < 2e-16 ***
## V12                             6.916e-04  6.360e-05   10.875  < 2e-16 ***
## V13                             2.419e-02  2.892e-03    8.366  < 2e-16 ***
## V14..                          -4.034e-01  1.001e+00   -0.403 0.687010    
## V14.Cambodia                    1.621e+00  1.309e+00    1.239 0.215505    
## V14.Canada                      7.824e-01  1.077e+00    0.726 0.467623    
## V14.China                      -9.027e-01  1.176e+00   -0.768 0.442732    
## V14.Columbia                   -2.441e+01  5.758e+04    0.000 0.999662    
## V14.Cuba                        1.205e+00  1.123e+00    1.073 0.283390    
## V14.Dominican.Republic         -7.977e-01  1.486e+00   -0.537 0.591305    
## V14.Ecuador                    -1.104e+00  1.607e+00   -0.687 0.492121    
## V14.El.Salvador                 8.271e-02  1.280e+00    0.065 0.948471    
## V14.England                     4.644e-01  1.095e+00    0.424 0.671504    
## V14.France                      1.409e+00  1.308e+00    1.078 0.281142    
## V14.Germany                     4.124e-01  1.048e+00    0.394 0.693942    
## V14.Greece                     -1.840e+00  1.297e+00   -1.419 0.155846    
## V14.Guatemala                  -2.407e+01  9.554e+04    0.000 0.999799    
## V14.Haiti                      -6.195e-02  1.258e+00   -0.049 0.960732    
## V14.Holand.Netherlands                 NA         NA       NA       NA    
## V14.Honduras                   -2.330e+01  1.741e+05    0.000 0.999893    
## V14.Hong                       -2.201e+01  2.388e+05    0.000 0.999926    
## V14.Hungary                    -7.904e-01  1.531e+00   -0.516 0.605591    
## V14.India                      -4.675e-01  1.080e+00   -0.433 0.665103    
## V14.Iran                        1.600e-02  1.127e+00    0.014 0.988670    
## V14.Ireland                     1.424e+00  1.373e+00    1.037 0.299660    
## V14.Italy                      -6.461e-02  1.132e+00   -0.057 0.954474    
## V14.Jamaica                     1.388e-02  1.222e+00    0.011 0.990940    
## V14.Japan                       4.253e-01  1.167e+00    0.364 0.715511    
## V14.Laos                       -2.419e+01  1.483e+05    0.000 0.999870    
## V14.Mexico                     -7.942e-01  1.153e+00   -0.689 0.490896    
## V14.Nicaragua                  -2.163e+01  1.614e+05    0.000 0.999893    
## V14.Outlying.US.Guam.USVI.etc. -2.304e+01  1.849e+05    0.000 0.999901    
## V14.Peru                       -1.213e+00  1.633e+00   -0.743 0.457765    
## V14.Philippines                 1.096e+00  1.052e+00    1.042 0.297548    
## V14.Poland                     -1.086e-01  1.139e+00   -0.095 0.923996    
## V14.Portugal                    5.104e-01  1.356e+00    0.376 0.706689    
## V14.Puerto.Rico                -1.235e+00  1.178e+00   -1.048 0.294454    
## V14.Scotland                   -5.170e-01  1.647e+00   -0.314 0.753623    
## V14.South                      -1.024e+00  1.172e+00   -0.873 0.382456    
## V14.Taiwan                     -4.563e-01  1.221e+00   -0.374 0.708564    
## V14.Thailand                   -8.318e-01  1.654e+00   -0.503 0.614977    
## V14.Trinadad.Tobago            -4.545e-01  1.754e+00   -0.259 0.795488    
## V14.United.States               1.502e-01  9.798e-01    0.153 0.878157    
## V14.Vietnam                    -2.492e+00  1.491e+00   -1.671 0.094637 .  
## V14.Yugoslavia                         NA         NA       NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 13708.0  on 11633  degrees of freedom
## Residual deviance:  7850.4  on 11543  degrees of freedom
## AIC: 8032.4
## 
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_KDE_5.50.5_n3_LrFit0,50) + ggtitle("Adult_TDA_KDE_5.50.5_n3_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_KDE_5.50.5_n3_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n3_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n3_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n3_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6764   905
##      >50K     652  1447
##                                           
##                Accuracy : 0.8406          
##                  95% CI : (0.8332, 0.8478)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5474          
##                                           
##  Mcnemar's Test P-Value : 1.698e-10       
##                                           
##             Sensitivity : 0.9121          
##             Specificity : 0.6152          
##          Pos Pred Value : 0.8820          
##          Neg Pred Value : 0.6894          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6925          
##    Detection Prevalence : 0.7851          
##       Balanced Accuracy : 0.7637          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n3_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6764   905
##      >50K     652  1447
##                                           
##                Accuracy : 0.8406          
##                  95% CI : (0.8332, 0.8478)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5474          
##                                           
##  Mcnemar's Test P-Value : 1.698e-10       
##                                           
##             Sensitivity : 0.9121          
##             Specificity : 0.6152          
##          Pos Pred Value : 0.8820          
##          Neg Pred Value : 0.6894          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6925          
##    Detection Prevalence : 0.7851          
##       Balanced Accuracy : 0.7637          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n3_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.406020e-01   5.474073e-01   8.331903e-01   8.478099e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.696017e-86   1.698345e-10
ad_tda_kde_5.50.5_n3_lr_cf0_ov_acc<-ad_tda_kde_5.50.5_n3_lr_cf0$overall[1]
ad_tda_kde_5.50.5_n3_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9120820            0.6152211            0.8819924 
##       Neg Pred Value            Precision               Recall 
##            0.6893759            0.8819924            0.9120820 
##                   F1           Prevalence       Detection Rate 
##            0.8967849            0.7592138            0.6924652 
## Detection Prevalence    Balanced Accuracy 
##            0.7851147            0.7636515
ad_tda_kde_5.50.5_n3_lr_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n3_lr_cf0$byClass[5:7]


###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_lr_n3_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.50.5_n3_lr_fit_re)
diff_tda_kde_5.50.5_lr_n3_3_fold
##      Accuracy
## 1 0.007308687
## 2 0.007990909
## 3 0.007447916
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_lr.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n3_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_lr.n3_3_fold_odds.left<-bst_tda_kde_5.50.5_lr.n3_3_fold$probLeft/bst_tda_kde_5.50.5_lr.n3_3_fold$probRight
bst_tda_kde_5.50.5_lr.n3_3_fold_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_lr.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n3_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_lr.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n3_3_fold
## $left
## [1] 9.338029e-05
## 
## $rope
## [1] 0.9950378
## 
## $right
## [1] 0.004868857
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_lr_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_lr.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr_n3_3_fold))
#bf_tda_kde_5.50.5_lr.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_lr_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_lr_n3_3_fold)
## t = 36.433, df = 2, p-value = 0.0007525
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.006687034 0.008477974
## sample estimates:
##   mean of x 
## 0.007582504
### Test set diff
diff_tda_kde_5.50.5_lr.n3_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n3_lr_cf0_ov_acc)
diff_tda_kde_5.50.5_lr.n3_test
##    Accuracy 
## -0.01279689
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_lr.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr.n3_test),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n3_test
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_lr.n3_test_odds.left<-bst_tda_kde_5.50.5_lr.n3_test$probLeft/bst_tda_kde_5.50.5_lr.n3_test$probRight
bst_tda_kde_5.50.5_lr.n3_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_lr.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr.n2_test),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n2_test
## $winLeft
## [1] 0.8408
## 
## $winRope
## [1] 0.1592
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_lr.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_lr.n3_test)))

#BayesFactor
#bf_tda_kde_5.50.5_lr.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr.n3_test)) #bf_tda_pca_5.50.5_lr.n3_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_lr.n3_test))

##Node4

Adult_TDA_KDE_5.50.5_n4_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_kde_adult_5.50.5.n4.vec, 
                      family = 'binomial',
                            method = 'glm', 
                      trControl = fitControl,
                            metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.50.5_n4_LrFit0
## Generalized Linear Model 
## 
## 10038 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 6692, 6691, 6693 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.8565451  0.5372228
Adult_TDA_KDE_5.50.5_n4_LrFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8607292 0.5522725    Fold1
## 2 0.8547953 0.5272775    Fold2
## 3 0.8541106 0.5321185    Fold3
ad_tda_kde_5.50.5_n4_lr_fit_re<-Adult_TDA_KDE_5.50.5_n4_LrFit0$resample[1]

summary(Adult_TDA_KDE_5.50.5_n4_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (19 not defined because of singularities)
##                                  Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)                    -1.260e+12  1.517e+13   -0.083 0.933819    
## V1                              7.282e-02  4.998e-03   14.569  < 2e-16 ***
## V2..                            7.797e+12  2.097e+13    0.372 0.710086    
## V2.Federal.gov                  7.797e+12  2.096e+13    0.372 0.709945    
## V2.Local.gov                    7.797e+12  2.098e+13    0.372 0.710183    
## V2.Never.worked                -4.496e+15  2.096e+13 -214.464  < 2e-16 ***
## V2.Private                      7.797e+12  2.097e+13    0.372 0.710089    
## V2.Self.emp.inc                 7.797e+12  2.095e+13    0.372 0.709801    
## V2.Self.emp.not.inc             7.797e+12  2.096e+13    0.372 0.709943    
## V2.State.gov                    7.797e+12  2.096e+13    0.372 0.709964    
## V2.Without.pay                 -4.496e+15  2.099e+13 -214.207  < 2e-16 ***
## V3                              1.580e-06  6.297e-07    2.509 0.012114 *  
## V4.10th                                NA         NA       NA       NA    
## V4.11th                                NA         NA       NA       NA    
## V4.12th                        -2.427e-01  4.229e-01   -0.574 0.566027    
## V4.1st.4th                             NA         NA       NA       NA    
## V4.5th.6th                             NA         NA       NA       NA    
## V4.7th.8th                             NA         NA       NA       NA    
## V4.9th                                 NA         NA       NA       NA    
## V4.Assoc.acdm                   4.819e-01  1.934e-01    2.491 0.012730 *  
## V4.Assoc.voc                    4.399e-01  1.318e-01    3.337 0.000846 ***
## V4.Bachelors                    1.112e+00  1.107e-01   10.048  < 2e-16 ***
## V4.Doctorate                           NA         NA       NA       NA    
## V4.HS.grad                     -2.108e-01  8.542e-02   -2.468 0.013604 *  
## V4.Masters                             NA         NA       NA       NA    
## V4.Preschool                           NA         NA       NA       NA    
## V4.Prof.school                         NA         NA       NA       NA    
## V4.Some.college                        NA         NA       NA       NA    
## V5                                     NA         NA       NA       NA    
## V6.Divorced                    -3.636e-01  5.024e-01   -0.724 0.469302    
## V6.Married.AF.spouse            2.500e+00  1.009e+00    2.478 0.013212 *  
## V6.Married.civ.spouse           1.809e+00  6.901e-01    2.621 0.008767 ** 
## V6.Married.spouse.absent       -1.916e-02  6.769e-01   -0.028 0.977419    
## V6.Never.married               -4.782e-01  5.110e-01   -0.936 0.349334    
## V6.Separated                   -6.317e-01  5.843e-01   -1.081 0.279634    
## V6.Widowed                             NA         NA       NA       NA    
## V7..                                   NA         NA       NA       NA    
## V7.Adm.clerical                 3.182e-01  1.760e-01    1.808 0.070658 .  
## V7.Armed.Forces                -1.233e+01  4.244e+02   -0.029 0.976824    
## V7.Craft.repair                 2.127e-01  1.511e-01    1.408 0.159250    
## V7.Exec.managerial              9.917e-01  1.593e-01    6.226 4.80e-10 ***
## V7.Farming.fishing             -8.439e-01  2.869e-01   -2.942 0.003263 ** 
## V7.Handlers.cleaners           -5.814e-01  2.561e-01   -2.270 0.023184 *  
## V7.Machine.op.inspct           -7.993e-02  1.823e-01   -0.439 0.660993    
## V7.Other.service               -3.570e-01  2.137e-01   -1.670 0.094864 .  
## V7.Priv.house.serv             -1.786e+01  3.461e+03   -0.005 0.995883    
## V7.Prof.specialty               8.173e-01  1.771e-01    4.614 3.96e-06 ***
## V7.Protective.serv              9.216e-01  2.343e-01    3.933 8.39e-05 ***
## V7.Sales                        6.051e-01  1.617e-01    3.743 0.000182 ***
## V7.Tech.support                 8.322e-01  2.095e-01    3.972 7.13e-05 ***
## V7.Transport.moving                    NA         NA       NA       NA    
## V8.Husband                     -1.545e+00  2.175e-01   -7.105 1.20e-12 ***
## V8.Not.in.family               -1.300e+00  5.183e-01   -2.508 0.012158 *  
## V8.Other.relative              -2.250e+00  5.009e-01   -4.492 7.06e-06 ***
## V8.Own.child                   -2.508e+00  4.923e-01   -5.093 3.52e-07 ***
## V8.Unmarried                   -1.627e+00  5.374e-01   -3.027 0.002473 ** 
## V8.Wife                                NA         NA       NA       NA    
## V9.Amer.Indian.Eskimo          -1.181e+00  5.091e-01   -2.319 0.020394 *  
## V9.Asian.Pac.Islander           4.669e-01  2.799e-01    1.668 0.095275 .  
## V9.Black                       -3.876e-01  1.459e-01   -2.657 0.007890 ** 
## V9.Other                       -1.463e+00  6.704e-01   -2.183 0.029036 *  
## V9.White                               NA         NA       NA       NA    
## V10.Female                     -8.349e-01  1.911e-01   -4.370 1.24e-05 ***
## V10.Male                               NA         NA       NA       NA    
## V11                             3.266e-04  2.052e-05   15.916  < 2e-16 ***
## V12                             5.521e-04  7.073e-05    7.805 5.93e-15 ***
## V13                             3.011e-02  3.387e-03    8.890  < 2e-16 ***
## V14..                          -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Cambodia                   -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Canada                     -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.China                      -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Columbia                   -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Cuba                       -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Dominican.Republic         -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Ecuador                    -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.El.Salvador                -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.England                    -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.France                     -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Germany                    -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Greece                     -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Guatemala                  -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Haiti                      -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Holand.Netherlands                 NA         NA       NA       NA    
## V14.Honduras                   -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Hong                       -4.510e+15  2.170e+13 -207.852  < 2e-16 ***
## V14.Hungary                    -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.India                      -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Iran                       -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Ireland                    -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Italy                      -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Jamaica                    -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Japan                      -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Laos                       -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Mexico                     -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Nicaragua                  -4.510e+15  2.170e+13 -207.852  < 2e-16 ***
## V14.Outlying.US.Guam.USVI.etc. -4.510e+15  2.170e+13 -207.852  < 2e-16 ***
## V14.Peru                       -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Philippines                -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Poland                     -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Portugal                   -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Puerto.Rico                -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Scotland                   -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.South                      -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Taiwan                     -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Thailand                   -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Trinadad.Tobago            -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.United.States              -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Vietnam                    -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Yugoslavia                 -6.537e+12  2.170e+13   -0.301 0.763212    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 10294.3  on 10037  degrees of freedom
## Residual deviance:  5791.2  on  9948  degrees of freedom
## AIC: 5971.2
## 
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_KDE_5.50.5_n4_LrFit0,50) + ggtitle("Adult_TDA_KDE_5.50.5_n4_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_KDE_5.50.5_n4_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n4_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n4_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n4_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6609   851
##      >50K     807  1501
##                                           
##                Accuracy : 0.8303          
##                  95% CI : (0.8227, 0.8377)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.5328          
##                                           
##  Mcnemar's Test P-Value : 0.291           
##                                           
##             Sensitivity : 0.8912          
##             Specificity : 0.6382          
##          Pos Pred Value : 0.8859          
##          Neg Pred Value : 0.6503          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6766          
##    Detection Prevalence : 0.7637          
##       Balanced Accuracy : 0.7647          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n4_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6609   851
##      >50K     807  1501
##                                           
##                Accuracy : 0.8303          
##                  95% CI : (0.8227, 0.8377)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.5328          
##                                           
##  Mcnemar's Test P-Value : 0.291           
##                                           
##             Sensitivity : 0.8912          
##             Specificity : 0.6382          
##          Pos Pred Value : 0.8859          
##          Neg Pred Value : 0.6503          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6766          
##    Detection Prevalence : 0.7637          
##       Balanced Accuracy : 0.7647          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n4_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.302621e-01   5.327644e-01   8.226681e-01   8.376586e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.475347e-65   2.909546e-01
ad_tda_kde_5.50.5_n4_lr_cf0_ov_acc<-ad_tda_kde_5.50.5_n4_lr_cf0$overall[1]
ad_tda_kde_5.50.5_n4_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.8911812            0.6381803            0.8859249 
##       Neg Pred Value            Precision               Recall 
##            0.6503466            0.8859249            0.8911812 
##                   F1           Prevalence       Detection Rate 
##            0.8885453            0.7592138            0.6765971 
## Detection Prevalence    Balanced Accuracy 
##            0.7637183            0.7646808
ad_tda_kde_5.50.5_n4_lr_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n4_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_lr_n4_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.50.5_n4_lr_fit_re)
diff_tda_kde_5.50.5_lr_n4_3_fold
##       Accuracy
## 1 -0.009603785
## 2 -0.005361278
## 3 -0.004544938
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_lr.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n4_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_lr.n4_3_fold_odds.left<-bst_tda_kde_5.50.5_lr.n4_3_fold$probLeft/bst_tda_kde_5.50.5_lr.n4_3_fold$probRight
bst_tda_kde_5.50.5_lr.n4_3_fold_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_lr.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n4_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_lr.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n4_3_fold
## $left
## [1] 0.09659779
## 
## $rope
## [1] 0.8974904
## 
## $right
## [1] 0.005911817
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_lr_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_lr.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr_n4_3_fold))
#bf_tda_kde_5.50.5_lr.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_lr_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_lr_n4_3_fold)
## t = -4.1474, df = 2, p-value = 0.05351
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.013250044  0.000243377
## sample estimates:
##    mean of x 
## -0.006503333
### Test set diff
diff_tda_kde_5.50.5_lr.n4_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n4_lr_cf0_ov_acc)
diff_tda_kde_5.50.5_lr.n4_test
##     Accuracy 
## -0.002457002
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_lr.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr.n4_test),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_lr.n4_test_odds.left<-bst_tda_kde_5.50.5_lr.n4_test$probLeft/bst_tda_kde_5.50.5_lr.n4_test$probRight
bst_tda_kde_5.50.5_lr.n4_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_lr.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr.n4_test),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_lr.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_lr.n4_test)))

#BayesFactor
#bf_tda_kde_5.50.5_lr.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr.n4_test)) #bf_tda_pca_5.50.5_lr.n4_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_lr.n4_test))

##Node5

Adult_TDA_KDE_5.50.5_n5_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_kde_adult_5.50.5.n5.vec, 
                      family = 'binomial',
                            method = 'glm', 
                      trControl = fitControl,
                            metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.50.5_n5_LrFit0
## Generalized Linear Model 
## 
## 7540 samples
##  108 predictor
##    2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 5026, 5028, 5026 
## Resampling results:
## 
##   Accuracy   Kappa   
##   0.8661809  0.386747
Adult_TDA_KDE_5.50.5_n5_LrFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8659507 0.3891890    Fold1
## 2 0.8682325 0.3875662    Fold2
## 3 0.8643596 0.3834859    Fold3
ad_tda_kde_5.50.5_n5_lr_fit_re<-Adult_TDA_KDE_5.50.5_n5_LrFit0$resample[1]

summary(Adult_TDA_KDE_5.50.5_n5_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (23 not defined because of singularities)
##                                  Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)                    -1.531e+13  1.949e+13   -0.786 0.432144    
## V1                              7.835e-02  6.899e-03   11.357  < 2e-16 ***
## V2..                            1.531e+13  1.949e+13    0.786 0.432144    
## V2.Federal.gov                  1.531e+13  1.949e+13    0.786 0.432144    
## V2.Local.gov                    1.531e+13  1.949e+13    0.786 0.432144    
## V2.Never.worked                -4.488e+15  1.949e+13 -230.295  < 2e-16 ***
## V2.Private                      1.531e+13  1.949e+13    0.786 0.432144    
## V2.Self.emp.inc                 1.531e+13  1.949e+13    0.786 0.432144    
## V2.Self.emp.not.inc             1.531e+13  1.949e+13    0.786 0.432144    
## V2.State.gov                    1.531e+13  1.949e+13    0.786 0.432144    
## V2.Without.pay                  1.531e+13  1.949e+13    0.786 0.432144    
## V3                              2.099e-06  9.540e-07    2.200 0.027778 *  
## V4.10th                                NA         NA       NA       NA    
## V4.11th                                NA         NA       NA       NA    
## V4.12th                                NA         NA       NA       NA    
## V4.1st.4th                             NA         NA       NA       NA    
## V4.5th.6th                             NA         NA       NA       NA    
## V4.7th.8th                             NA         NA       NA       NA    
## V4.9th                                 NA         NA       NA       NA    
## V4.Assoc.acdm                          NA         NA       NA       NA    
## V4.Assoc.voc                    4.025e-01  1.816e-01    2.216 0.026663 *  
## V4.Bachelors                           NA         NA       NA       NA    
## V4.Doctorate                           NA         NA       NA       NA    
## V4.HS.grad                     -3.119e-01  8.559e-02   -3.644 0.000268 ***
## V4.Masters                             NA         NA       NA       NA    
## V4.Preschool                           NA         NA       NA       NA    
## V4.Prof.school                         NA         NA       NA       NA    
## V4.Some.college                        NA         NA       NA       NA    
## V5                                     NA         NA       NA       NA    
## V6.Divorced                    -2.283e-01  1.066e+00   -0.214 0.830365    
## V6.Married.AF.spouse            2.486e+00  1.507e+00    1.650 0.098997 .  
## V6.Married.civ.spouse           2.141e+00  1.174e+00    1.825 0.068066 .  
## V6.Married.spouse.absent       -2.802e-01  1.305e+00   -0.215 0.829953    
## V6.Never.married               -5.760e-01  1.068e+00   -0.539 0.589690    
## V6.Separated                   -4.894e-01  1.139e+00   -0.430 0.667446    
## V6.Widowed                             NA         NA       NA       NA    
## V7..                                   NA         NA       NA       NA    
## V7.Adm.clerical                 3.736e-01  2.086e-01    1.791 0.073227 .  
## V7.Armed.Forces                -2.274e+01  1.525e+05    0.000 0.999881    
## V7.Craft.repair                 2.768e-01  1.663e-01    1.664 0.096174 .  
## V7.Exec.managerial              8.350e-01  1.840e-01    4.537 5.69e-06 ***
## V7.Farming.fishing             -3.688e-01  3.161e-01   -1.167 0.243340    
## V7.Handlers.cleaners           -4.164e-01  2.677e-01   -1.556 0.119827    
## V7.Machine.op.inspct            6.750e-02  1.994e-01    0.339 0.734898    
## V7.Other.service               -3.180e-01  2.459e-01   -1.293 0.196035    
## V7.Priv.house.serv             -2.393e+01  7.364e+04    0.000 0.999741    
## V7.Prof.specialty               1.189e+00  2.334e-01    5.093 3.52e-07 ***
## V7.Protective.serv              8.446e-01  2.740e-01    3.082 0.002056 ** 
## V7.Sales                        4.039e-01  1.859e-01    2.173 0.029765 *  
## V7.Tech.support                 1.113e+00  2.595e-01    4.291 1.78e-05 ***
## V7.Transport.moving                    NA         NA       NA       NA    
## V8.Husband                     -1.278e+00  3.035e-01   -4.212 2.53e-05 ***
## V8.Not.in.family               -7.738e-01  6.013e-01   -1.287 0.198168    
## V8.Other.relative              -1.731e+00  6.119e-01   -2.830 0.004662 ** 
## V8.Own.child                   -1.756e+00  5.661e-01   -3.101 0.001926 ** 
## V8.Unmarried                   -1.727e+00  6.715e-01   -2.573 0.010095 *  
## V8.Wife                                NA         NA       NA       NA    
## V9.Amer.Indian.Eskimo          -1.486e+00  5.768e-01   -2.576 0.009981 ** 
## V9.Asian.Pac.Islander           5.260e-01  4.353e-01    1.208 0.226894    
## V9.Black                       -5.663e-01  1.998e-01   -2.834 0.004591 ** 
## V9.Other                       -2.357e+00  1.110e+00   -2.124 0.033646 *  
## V9.White                               NA         NA       NA       NA    
## V10.Female                     -6.602e-01  2.631e-01   -2.509 0.012094 *  
## V10.Male                               NA         NA       NA       NA    
## V11                             3.697e-04  2.660e-05   13.899  < 2e-16 ***
## V12                             5.408e-04  8.660e-05    6.244 4.26e-10 ***
## V13                             2.854e-02  4.059e-03    7.032 2.04e-12 ***
## V14..                          -1.407e+00  1.435e+00   -0.980 0.326858    
## V14.Cambodia                   -2.607e+01  1.507e+05    0.000 0.999862    
## V14.Canada                     -1.840e+00  1.800e+00   -1.022 0.306601    
## V14.China                      -1.393e+00  1.707e+00   -0.816 0.414689    
## V14.Columbia                   -2.562e+01  8.344e+04    0.000 0.999755    
## V14.Cuba                        3.551e-01  1.558e+00    0.228 0.819681    
## V14.Dominican.Republic         -1.727e+01  1.399e+03   -0.012 0.990148    
## V14.Ecuador                    -1.909e-03  1.691e+00   -0.001 0.999099    
## V14.El.Salvador                -2.776e+00  1.783e+00   -1.557 0.119535    
## V14.England                    -6.973e-01  1.660e+00   -0.420 0.674469    
## V14.France                     -2.309e+01  1.625e+05    0.000 0.999887    
## V14.Germany                    -8.534e-01  1.495e+00   -0.571 0.568216    
## V14.Greece                     -1.636e+00  2.115e+00   -0.774 0.439051    
## V14.Guatemala                  -3.395e-01  1.813e+00   -0.187 0.851448    
## V14.Haiti                      -2.321e+01  1.092e+05    0.000 0.999830    
## V14.Holand.Netherlands                 NA         NA       NA       NA    
## V14.Honduras                   -2.470e+01  2.452e+05    0.000 0.999920    
## V14.Hong                       -2.668e+01  1.398e+05    0.000 0.999848    
## V14.Hungary                    -2.539e+01  3.120e+05    0.000 0.999935    
## V14.India                      -2.833e+00  2.209e+00   -1.282 0.199724    
## V14.Iran                        2.252e-01  1.758e+00    0.128 0.898083    
## V14.Ireland                    -1.216e+00  1.790e+00   -0.679 0.496897    
## V14.Italy                      -2.567e+00  1.866e+00   -1.376 0.168826    
## V14.Jamaica                    -1.321e-01  1.564e+00   -0.084 0.932709    
## V14.Japan                      -2.613e+01  8.646e+04    0.000 0.999759    
## V14.Laos                       -2.666e+01  1.732e+05    0.000 0.999877    
## V14.Mexico                     -1.881e+00  1.498e+00   -1.255 0.209357    
## V14.Nicaragua                  -2.527e+01  1.007e+05    0.000 0.999800    
## V14.Outlying.US.Guam.USVI.etc. -2.399e+01  1.271e+05    0.000 0.999849    
## V14.Peru                       -2.576e+01  1.247e+05    0.000 0.999835    
## V14.Philippines                -7.225e-01  1.581e+00   -0.457 0.647625    
## V14.Poland                     -1.867e+00  1.771e+00   -1.054 0.291869    
## V14.Portugal                   -6.629e-01  1.856e+00   -0.357 0.720983    
## V14.Puerto.Rico                -2.366e+00  1.781e+00   -1.329 0.183994    
## V14.Scotland                   -2.715e+01  2.557e+05    0.000 0.999915    
## V14.South                      -1.428e+00  1.710e+00   -0.835 0.403544    
## V14.Taiwan                     -2.466e+00  1.940e+00   -1.271 0.203759    
## V14.Thailand                   -2.201e+00  2.037e+00   -1.080 0.280071    
## V14.Trinadad.Tobago            -2.743e+01  2.557e+05    0.000 0.999914    
## V14.United.States              -1.026e+00  1.386e+00   -0.740 0.459267    
## V14.Vietnam                    -2.153e+00  1.842e+00   -1.169 0.242491    
## V14.Yugoslavia                         NA         NA       NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6474.2  on 7539  degrees of freedom
## Residual deviance: 4003.6  on 7454  degrees of freedom
## AIC: 4175.6
## 
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_KDE_5.50.5_n5_LrFit0,50) + ggtitle("Adult_TDA_KDE_5.50.5_n5_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_KDE_5.50.5_n5_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n5_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n5_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n5_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6503   882
##      >50K     913  1470
##                                           
##                Accuracy : 0.8162          
##                  95% CI : (0.8084, 0.8239)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.4996          
##                                           
##  Mcnemar's Test P-Value : 0.4789          
##                                           
##             Sensitivity : 0.8769          
##             Specificity : 0.6250          
##          Pos Pred Value : 0.8806          
##          Neg Pred Value : 0.6169          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6657          
##    Detection Prevalence : 0.7560          
##       Balanced Accuracy : 0.7509          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n5_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6503   882
##      >50K     913  1470
##                                           
##                Accuracy : 0.8162          
##                  95% CI : (0.8084, 0.8239)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.4996          
##                                           
##  Mcnemar's Test P-Value : 0.4789          
##                                           
##             Sensitivity : 0.8769          
##             Specificity : 0.6250          
##          Pos Pred Value : 0.8806          
##          Neg Pred Value : 0.6169          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6657          
##    Detection Prevalence : 0.7560          
##       Balanced Accuracy : 0.7509          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n5_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.162367e-01   4.996394e-01   8.084112e-01   8.238731e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   2.955418e-42   4.788888e-01
ad_tda_kde_5.50.5_n5_lr_cf0_ov_acc<-ad_tda_kde_5.50.5_n5_lr_cf0$overall[1]
ad_tda_kde_5.50.5_n5_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.8768878            0.6250000            0.8805687 
##       Neg Pred Value            Precision               Recall 
##            0.6168695            0.8805687            0.8768878 
##                   F1           Prevalence       Detection Rate 
##            0.8787244            0.7592138            0.6657453 
## Detection Prevalence    Balanced Accuracy 
##            0.7560401            0.7509439
ad_tda_kde_5.50.5_n5_lr_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n5_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_lr_n5_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.50.5_n5_lr_fit_re)
diff_tda_kde_5.50.5_lr_n5_3_fold
##      Accuracy
## 1 -0.01482523
## 2 -0.01879842
## 3 -0.01479391
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_lr.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n5_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_lr.n5_3_fold_odds.left<-bst_tda_kde_5.50.5_lr.n5_3_fold$probLeft/bst_tda_kde_5.50.5_lr.n5_3_fold$probRight
bst_tda_kde_5.50.5_lr.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_lr.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n5_3_fold
## $winLeft
## [1] 0.8329
## 
## $winRope
## [1] 0.1671
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_lr.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n5_3_fold
## $left
## [1] 0.9713858
## 
## $rope
## [1] 0.02689802
## 
## $right
## [1] 0.001716158
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_lr_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_lr.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr_n5_3_fold))
#bf_tda_kde_5.50.5_lr.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_lr_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_lr_n5_3_fold)
## t = -12.138, df = 2, p-value = 0.006719
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.02186020 -0.01041818
## sample estimates:
##   mean of x 
## -0.01613919
### Test set diff
diff_tda_kde_5.50.5_lr.n5_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n5_lr_cf0_ov_acc)
diff_tda_kde_5.50.5_lr.n5_test
##   Accuracy 
## 0.01156839
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_lr.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr.n5_test),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_lr.n5_test_odds.left<-bst_tda_kde_5.50.5_lr.n5_test$probLeft/bst_tda_kde_5.50.5_lr.n5_test$probRight
bst_tda_kde_5.50.5_lr.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_lr.n5_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr.n5_test),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.4549333
## 
## $winRight
## [1] 0.5450667
# Bayesian Correlated Test

bct_tda_kde_5.50.5_lr.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_lr.n5_test)))

#BayesFactor
#bf_tda_kde_5.50.5_lr.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr.n5_test)) #bf_tda_pca_5.50.5_lr.n5_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_lr.n5_test))


#naiveBayes 
adultNbFit <- train(as.factor(adult_df1) ~ ., data = adult.one_hot_df4Train, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V14.Guatemala, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands, V14.Hungary, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Outlying.US.Guam.USVI.etc.
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
adultNbFit
## Naive Bayes 
## 
## 22793 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 15195, 15196, 15195 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa     
##   FALSE            NaN         NaN
##    TRUE      0.7606282  0.00909498
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
adultNbFit$resample
##    Accuracy       Kappa Resample
## 1 0.7603317 0.007448036    Fold1
## 2 0.7601685 0.005800114    Fold2
## 3 0.7613846 0.014036789    Fold3
ad_nb_fit_re<-adultNbFit$resample[1]

summary(adultNbFit)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
#varImp (adultNbFit)



# Predict outcome using model from training data based on testing data
predictions <- predict(adultNbFit, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
nb_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
nb_cf
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2194
##      >50K       0   158
##                                          
##                Accuracy : 0.7754         
##                  95% CI : (0.767, 0.7836)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 8.684e-05      
##                                          
##                   Kappa : 0.0986         
##                                          
##  Mcnemar's Test P-Value : < 2.2e-16      
##                                          
##             Sensitivity : 1.00000        
##             Specificity : 0.06718        
##          Pos Pred Value : 0.77170        
##          Neg Pred Value : 1.00000        
##              Prevalence : 0.75921        
##          Detection Rate : 0.75921        
##    Detection Prevalence : 0.98382        
##       Balanced Accuracy : 0.53359        
##                                          
##        'Positive' Class :  <=50K         
## 
nb_cf$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.753890e-01   9.857036e-02   7.669804e-01   7.836332e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   8.684169e-05   0.000000e+00
nb_cf_ov_acc<-nb_cf$overall[1]
nb_cf$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##           1.00000000           0.06717687           0.77169615 
##       Neg Pred Value            Precision               Recall 
##           1.00000000           0.77169615           1.00000000 
##                   F1           Prevalence       Detection Rate 
##           0.87113826           0.75921376           0.75921376 
## Detection Prevalence    Balanced Accuracy 
##           0.98382473           0.53358844
nb_cf_pre_rec_f1<-nb_cf$byClass[5:7]


##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node1

Adult_TDA_PC_5.50.5_n1_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n1.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.., V2.Never.worked, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Preschool, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Married.spouse.absent, V6.Never.married, V6.Separated, V6.Widowed, V7.., V7.Adm.clerical, V7.Armed.Forces, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Other.service, V7.Priv.house.serv, V7.Protective.serv, V7.Tech.support, V8.Husband, V8.Not.in.family, V8.Other.relative, V8.Own.child, V8.Unmarried, V8.Wife, V9.Amer.Indian.Eskimo, V9.Black, V9.Other, V10.Female, V10.Male, V14.., V14.Cambodia, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.., V2.Never.worked, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Preschool, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Married.spouse.absent, V6.Never.married, V6.Separated, V6.Widowed, V7.., V7.Adm.clerical, V7.Armed.Forces, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Other.service, V7.Priv.house.serv, V7.Protective.serv, V7.Tech.support, V8.Husband, V8.Not.in.family, V8.Other.relative, V8.Own.child, V8.Unmarried, V8.Wife, V9.Amer.Indian.Eskimo, V9.Black, V9.Other, V10.Female, V10.Male, V14.Cambodia, V14.Canada, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.., V2.Never.worked, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Preschool, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Married.spouse.absent, V6.Never.married, V6.Separated, V6.Widowed, V7.., V7.Adm.clerical, V7.Armed.Forces, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Other.service, V7.Priv.house.serv, V7.Protective.serv, V7.Tech.support, V8.Husband, V8.Not.in.family, V8.Other.relative, V8.Own.child, V8.Unmarried, V8.Wife, V9.Amer.Indian.Eskimo, V9.Black, V9.Other, V10.Female, V10.Male, V14.Cambodia, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.50.5_n1_NbFit0
## Naive Bayes 
## 
## 4917 samples
##  108 predictor
##    2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 3278, 3278, 3278 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa
##   FALSE            NaN  NaN  
##    TRUE      0.9733577    0  
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_PC_5.50.5_n1_NbFit0$resample
##    Accuracy Kappa Resample
## 1 0.9737645     0    Fold1
## 2 0.9731544     0    Fold2
## 3 0.9731544     0    Fold3
ad_tda_pc_5.50.5_n1_nb_fit_re<-Adult_TDA_PC_5.50.5_n1_NbFit0$resample[1]

summary(Adult_TDA_PC_5.50.5_n1_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_PC_5.50.5_n1_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n1_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n1_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n1_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K      0     0
##      >50K    7416  2352
##                                           
##                Accuracy : 0.2408          
##                  95% CI : (0.2323, 0.2494)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.0000          
##             Specificity : 1.0000          
##          Pos Pred Value :    NaN          
##          Neg Pred Value : 0.2408          
##              Prevalence : 0.7592          
##          Detection Rate : 0.0000          
##    Detection Prevalence : 0.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n1_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K      0     0
##      >50K    7416  2352
##                                           
##                Accuracy : 0.2408          
##                  95% CI : (0.2323, 0.2494)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.0000          
##             Specificity : 1.0000          
##          Pos Pred Value :    NaN          
##          Neg Pred Value : 0.2408          
##              Prevalence : 0.7592          
##          Detection Rate : 0.0000          
##    Detection Prevalence : 0.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n1_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.2407862      0.0000000      0.2323343      0.2493929      0.7592138 
## AccuracyPValue  McnemarPValue 
##      1.0000000      0.0000000
ad_tda_pc_5.50.5_n1_nb_cf0_ov_acc<-ad_tda_pc_5.50.5_n1_nb_cf0$overall[1]
ad_tda_pc_5.50.5_n1_nb_cf0$byClas1
## NULL
ad_tda_pc_5.50.5_n1_nb_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n1_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_nb_n1_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.50.5_n1_nb_fit_re)
diff_tda_pca_5.50.5_nb_n1_3_fold
##     Accuracy
## 1 -0.2134328
## 2 -0.2129859
## 3 -0.2117698
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nb.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n1_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nb.n1_3_fold_odds.left<-bst_tda_pca_5.50.5_nb.n1_3_fold$probLeft/bst_tda_pca_5.50.5_nb.n1_3_fold$probRight
bst_tda_pca_5.50.5_nb.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nb.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n1_3_fold
## $winLeft
## [1] 0.9911667
## 
## $winRope
## [1] 0.008833333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nb.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n1_3_fold
## $left
## [1] 0.999996
## 
## $rope
## [1] 6.869588e-07
## 
## $right
## [1] 3.318044e-06
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nb_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nb.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb_n1_3_fold))
#bf_tda_pca_5.50.5_nb.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nb_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_nb_n1_3_fold)
## t = -428.12, df = 2, p-value = 5.456e-06
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.2148675 -0.2105915
## sample estimates:
##  mean of x 
## -0.2127295
### Test set diff
diff_tda_pca_5.50.5_nb.n1_test<-(nb_cf_ov_acc - ad_tda_pc_5.50.5_n1_nb_cf0_ov_acc)
diff_tda_pca_5.50.5_nb.n1_test
##  Accuracy 
## 0.5346028
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nb.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb.n1_test),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n1_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nb.n1_test_odds.left<-bst_tda_pca_5.50.5_nb.n1_test$probLeft/bst_tda_pca_5.50.5_nb.n1_test$probRight
bst_tda_pca_5.50.5_nb.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nb.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb.n1_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n1_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1592667
## 
## $winRight
## [1] 0.8407333
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nb.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nb.n1_test)))

#BayesFactor
#bf_tda_pca_5.50.5_nb.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb.n1_test)) #bf_tda_pca_5.50.5_nb.n1_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nb.n1_test))

##Node2

Adult_TDA_PC_5.50.5_n2_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n2.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv, V8.Unmarried, V14.Dominican.Republic, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv, V8.Unmarried, V14.Dominican.Republic, V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv, V8.Unmarried, V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras, V14.Ireland, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Thailand, V14.Trinadad.Tobago
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.50.5_n2_NbFit0
## Naive Bayes 
## 
## 12206 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8136, 8138, 8138 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa    
##   FALSE            NaN        NaN
##    TRUE      0.5471151  0.1056197
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_PC_5.50.5_n2_NbFit0$resample
##    Accuracy       Kappa Resample
## 1 0.5039312 0.081902576    Fold1
## 2 0.5919371 0.233191922    Fold2
## 3 0.5454769 0.001764559    Fold3
ad_tda_pc_5.50.5_n2_nb_fit_re<-Adult_TDA_PC_5.50.5_n2_NbFit0$resample[1]

summary(Adult_TDA_PC_5.50.5_n2_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_PC_5.50.5_n2_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n2_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n2_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n2_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   3598  1573
##      >50K    3818   779
##                                          
##                Accuracy : 0.4481         
##                  95% CI : (0.4382, 0.458)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 1              
##                                          
##                   Kappa : -0.1385        
##                                          
##  Mcnemar's Test P-Value : <2e-16         
##                                          
##             Sensitivity : 0.4852         
##             Specificity : 0.3312         
##          Pos Pred Value : 0.6958         
##          Neg Pred Value : 0.1695         
##              Prevalence : 0.7592         
##          Detection Rate : 0.3683         
##    Detection Prevalence : 0.5294         
##       Balanced Accuracy : 0.4082         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_pc_5.50.5_n2_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   3598  1573
##      >50K    3818   779
##                                          
##                Accuracy : 0.4481         
##                  95% CI : (0.4382, 0.458)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 1              
##                                          
##                   Kappa : -0.1385        
##                                          
##  Mcnemar's Test P-Value : <2e-16         
##                                          
##             Sensitivity : 0.4852         
##             Specificity : 0.3312         
##          Pos Pred Value : 0.6958         
##          Neg Pred Value : 0.1695         
##              Prevalence : 0.7592         
##          Detection Rate : 0.3683         
##    Detection Prevalence : 0.5294         
##       Balanced Accuracy : 0.4082         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_pc_5.50.5_n2_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   4.480958e-01  -1.384920e-01   4.381994e-01   4.580232e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.000000e+00  3.863576e-205
ad_tda_pc_5.50.5_n2_nb_cf0_ov_acc<-ad_tda_pc_5.50.5_n2_nb_cf0$overall[1]
ad_tda_pc_5.50.5_n2_nb_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.4851672            0.3312075            0.6958035 
##       Neg Pred Value            Precision               Recall 
##            0.1694583            0.6958035            0.4851672 
##                   F1           Prevalence       Detection Rate 
##            0.5717010            0.7592138            0.3683456 
## Detection Prevalence    Balanced Accuracy 
##            0.5293817            0.4081873
ad_tda_pc_5.50.5_n2_nb_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n2_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_nb_n2_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.50.5_n2_nb_fit_re)
diff_tda_pca_5.50.5_nb_n2_3_fold
##    Accuracy
## 1 0.2564005
## 2 0.1682314
## 3 0.2159077
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nb.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nb.n2_3_fold_odds.left<-bst_tda_pca_5.50.5_nb.n2_3_fold$probLeft/bst_tda_pca_5.50.5_nb.n2_3_fold$probRight
bst_tda_pca_5.50.5_nb.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nb.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.008766667
## 
## $winRight
## [1] 0.9912333
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nb.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n2_3_fold
## $left
## [1] 0.008445021
## 
## $rope
## [1] 0.001688794
## 
## $right
## [1] 0.9898662
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nb_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nb.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb_n2_3_fold))
#bf_tda_pca_5.50.5_nb.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nb_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_nb_n2_3_fold)
## t = 8.3795, df = 2, p-value = 0.01394
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.1038801 0.3231463
## sample estimates:
## mean of x 
## 0.2135132
### Test set diff
diff_tda_pca_5.50.5_nb.n2_test<-(nb_cf_ov_acc - ad_tda_pc_5.50.5_n2_nb_cf0_ov_acc)
diff_tda_pca_5.50.5_nb.n2_test
##  Accuracy 
## 0.3272932
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nb.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb.n2_test),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nb.n2_test_odds.left<-bst_tda_pca_5.50.5_nb.n2_test$probLeft/bst_tda_pca_5.50.5_nb.n2_test$probRight
bst_tda_pca_5.50.5_nb.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nb.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb.n2_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1624
## 
## $winRight
## [1] 0.8376
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nb.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nb.n2_test)))

#BayesFactor
#bf_tda_pca_5.50.5_nb.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb.n2_test)) #bf_tda_pca_5.50.5_nb.n2_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nb.n2_test))

##Node3

Adult_TDA_PC_5.50.5_n3_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n3.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Greece, V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Portugal, V14.Thailand
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Columbia, V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.50.5_n3_NbFit0
## Naive Bayes 
## 
## 13240 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8827, 8827, 8826 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa      
##   FALSE            NaN          NaN
##    TRUE      0.7719034  0.003051974
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_PC_5.50.5_n3_NbFit0$resample
##    Accuracy       Kappa Resample
## 1 0.7729436 0.009155921    Fold1
## 2 0.7713574 0.000000000    Fold2
## 3 0.7714092 0.000000000    Fold3
ad_tda_pc_5.50.5_n3_nb_fit_re<-Adult_TDA_PC_5.50.5_n3_NbFit0$resample[1]

summary(Adult_TDA_PC_5.50.5_n3_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_PC_5.50.5_n3_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n3_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n3_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n3_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2339
##      >50K       0    13
##                                         
##                Accuracy : 0.7605        
##                  95% CI : (0.752, 0.769)
##     No Information Rate : 0.7592        
##     P-Value [Acc > NIR] : 0.3844        
##                                         
##                   Kappa : 0.0084        
##                                         
##  Mcnemar's Test P-Value : <2e-16        
##                                         
##             Sensitivity : 1.000000      
##             Specificity : 0.005527      
##          Pos Pred Value : 0.760226      
##          Neg Pred Value : 1.000000      
##              Prevalence : 0.759214      
##          Detection Rate : 0.759214      
##    Detection Prevalence : 0.998669      
##       Balanced Accuracy : 0.502764      
##                                         
##        'Positive' Class :  <=50K        
## 
ad_tda_pc_5.50.5_n3_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2339
##      >50K       0    13
##                                         
##                Accuracy : 0.7605        
##                  95% CI : (0.752, 0.769)
##     No Information Rate : 0.7592        
##     P-Value [Acc > NIR] : 0.3844        
##                                         
##                   Kappa : 0.0084        
##                                         
##  Mcnemar's Test P-Value : <2e-16        
##                                         
##             Sensitivity : 1.000000      
##             Specificity : 0.005527      
##          Pos Pred Value : 0.760226      
##          Neg Pred Value : 1.000000      
##              Prevalence : 0.759214      
##          Detection Rate : 0.759214      
##    Detection Prevalence : 0.998669      
##       Balanced Accuracy : 0.502764      
##                                         
##        'Positive' Class :  <=50K        
## 
ad_tda_pc_5.50.5_n3_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##    0.760544636    0.008368689    0.751953582    0.768980076    0.759213759 
## AccuracyPValue  McnemarPValue 
##    0.384402497    0.000000000
ad_tda_pc_5.50.5_n3_nb_cf0_ov_acc<-ad_tda_pc_5.50.5_n3_nb_cf0$overall[1]
ad_tda_pc_5.50.5_n3_nb_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##          1.000000000          0.005527211          0.760225525 
##       Neg Pred Value            Precision               Recall 
##          1.000000000          0.760225525          1.000000000 
##                   F1           Prevalence       Detection Rate 
##          0.863781958          0.759213759          0.759213759 
## Detection Prevalence    Balanced Accuracy 
##          0.998669124          0.502763605
ad_tda_pc_5.50.5_n3_nb_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n3_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_nb_n3_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.50.5_n3_nb_fit_re)
diff_tda_pca_5.50.5_nb_n3_3_fold
##      Accuracy
## 1 -0.01261191
## 2 -0.01118887
## 3 -0.01002458
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nb.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n3_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nb.n3_3_fold_odds.left<-bst_tda_pca_5.50.5_nb.n3_3_fold$probLeft/bst_tda_pca_5.50.5_nb.n3_3_fold$probRight
bst_tda_pca_5.50.5_nb.n3_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nb.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n3_3_fold
## $winLeft
## [1] 0.8343333
## 
## $winRope
## [1] 0.1656667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nb.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n3_3_fold
## $left
## [1] 0.8610332
## 
## $rope
## [1] 0.1381444
## 
## $right
## [1] 0.0008223578
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nb_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nb.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb_n3_3_fold))
#bf_tda_pca_5.50.5_nb.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nb_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_nb_n3_3_fold)
## t = -15.071, df = 2, p-value = 0.004374
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.014494114 -0.008056121
## sample estimates:
##   mean of x 
## -0.01127512
### Test set diff
diff_tda_pca_5.50.5_nb.n3_test<-(nb_cf_ov_acc - ad_tda_pc_5.50.5_n3_nb_cf0_ov_acc)
diff_tda_pca_5.50.5_nb.n3_test
##   Accuracy 
## 0.01484439
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nb.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb.n3_test),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n3_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nb.n3_test_odds.left<-bst_tda_pca_5.50.5_nb.n3_test$probLeft/bst_tda_pca_5.50.5_nb.n3_test$probRight
bst_tda_pca_5.50.5_nb.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nb.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb.n2_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1605333
## 
## $winRight
## [1] 0.8394667
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nb.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nb.n3_test)))

#BayesFactor
#bf_tda_pca_5.50.5_nb.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb.n3_test)) #bf_tda_pca_5.50.5_nb.n3_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nb.n3_test))

##Node4

Adult_TDA_PC_5.50.5_n4_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n4.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V8.Husband, V14.Cambodia, V14.Columbia, V14.Cuba, V14.Ecuador, V14.El.Salvador, V14.France, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.India, V14.Iran, V14.Ireland, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Poland, V14.Thailand, V14.Trinadad.Tobago
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V8.Husband, V14.Cambodia, V14.Columbia, V14.Ecuador, V14.Greece, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Portugal, V14.Thailand, V14.Trinadad.Tobago, V14.Yugoslavia
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V8.Husband, V14.Cambodia, V14.Columbia, V14.Ecuador, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Iran, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland, V14.Thailand, V14.Trinadad.Tobago
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.50.5_n4_NbFit0
## Naive Bayes 
## 
## 16700 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 11133, 11134, 11133 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa
##   FALSE            NaN  NaN  
##    TRUE      0.9449102    0  
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_PC_5.50.5_n4_NbFit0$resample
##    Accuracy Kappa Resample
## 1 0.9448536     0    Fold1
## 2 0.9450234     0    Fold2
## 3 0.9448536     0    Fold3
ad_tda_pc_5.50.5_n4_nb_fit_re<-Adult_TDA_PC_5.50.5_n4_NbFit0$resample[1]

summary(Adult_TDA_PC_5.50.5_n4_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_PC_5.50.5_n4_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n4_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n4_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n4_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n4_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n4_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.7592138      0.0000000      0.7506071      0.7676657      0.7592138 
## AccuracyPValue  McnemarPValue 
##      0.5055358      0.0000000
ad_tda_pc_5.50.5_n4_nb_cf0_ov_acc<-ad_tda_pc_5.50.5_n4_nb_cf0$overall[1]
ad_tda_pc_5.50.5_n4_nb_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            1.0000000            0.0000000            0.7592138 
##       Neg Pred Value            Precision               Recall 
##                  NaN            0.7592138            1.0000000 
##                   F1           Prevalence       Detection Rate 
##            0.8631285            0.7592138            0.7592138 
## Detection Prevalence    Balanced Accuracy 
##            1.0000000            0.5000000
ad_tda_pc_5.50.5_n4_nb_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n4_nb_cf0$byClass[5:7]


###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_nb_n4_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.50.5_n4_nb_fit_re)
diff_tda_pca_5.50.5_nb_n4_3_fold
##     Accuracy
## 1 -0.1845219
## 2 -0.1848549
## 3 -0.1834690
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nb.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n4_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nb.n4_3_fold_odds.left<-bst_tda_pca_5.50.5_nb.n4_3_fold$probLeft/bst_tda_pca_5.50.5_nb.n4_3_fold$probRight
bst_tda_pca_5.50.5_nb.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nb.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n4_3_fold
## $winLeft
## [1] 0.9914
## 
## $winRope
## [1] 0.0086
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nb.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n4_3_fold
## $left
## [1] 0.9999962
## 
## $rope
## [1] 7.47709e-07
## 
## $right
## [1] 3.081056e-06
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nb_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nb.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb_n4_3_fold))
#bf_tda_pca_5.50.5_nb.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nb_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_nb_n4_3_fold)
## t = -441.22, df = 2, p-value = 5.137e-06
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1860790 -0.1824849
## sample estimates:
##  mean of x 
## -0.1842819
### Test set diff
diff_tda_pca_5.50.5_nb.n4_test<-(nb_cf_ov_acc - ad_tda_pc_5.50.5_n4_nb_cf0_ov_acc)
diff_tda_pca_5.50.5_nb.n4_test
##   Accuracy 
## 0.01617527
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nb.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb.n4_test),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nb.n4_test_odds.left<-bst_tda_pca_5.50.5_nb.n4_test$probLeft/bst_tda_pca_5.50.5_nb.n4_test$probRight
bst_tda_pca_5.50.5_nb.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nb.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb.n4_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.4590333
## 
## $winRight
## [1] 0.5409667
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nb.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nb.n4_test)))

#BayesFactor
#bf_tda_pca_5.50.5_nb.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb.n4_test)) #bf_tda_pca_5.50.5_nb.n4_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nb.n4_test))

##Node5

Adult_TDA_PC_5.50.5_n5_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n5.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.., V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.Without.pay, V4.10th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Married.spouse.absent, V7.., V7.Armed.Forces, V7.Craft.repair, V7.Farming.fishing, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Priv.house.serv, V7.Prof.specialty, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Other.relative, V8.Wife, V9.Amer.Indian.Eskimo, V9.Other, V10.Female, V10.Male, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.., V2.Local.gov, V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.Without.pay, V4.10th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V6.Married.AF.spouse, V6.Married.civ.spouse, V7.., V7.Armed.Forces, V7.Craft.repair, V7.Exec.managerial, V7.Farming.fishing, V7.Handlers.cleaners, V7.Priv.house.serv, V7.Protective.serv, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Other.relative, V8.Wife, V9.Amer.Indian.Eskimo, V9.Other, V10.Female, V10.Male, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.India, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.., V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.State.gov, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.voc, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Separated, V7.., V7.Armed.Forces, V7.Craft.repair, V7.Farming.fishing, V7.Handlers.cleaners, V7.Priv.house.serv, V7.Sales, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Other.relative, V8.Wife, V9.Amer.Indian.Eskimo, V9.Other, V10.Female, V10.Male, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.50.5_n5_NbFit0
## Naive Bayes 
## 
## 14404 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 9603, 9602, 9603 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa
##   FALSE            NaN  NaN  
##    TRUE      0.9979867    0  
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_PC_5.50.5_n5_NbFit0$resample
##    Accuracy Kappa Resample
## 1 0.9981254     0    Fold1
## 2 0.9979175     0    Fold2
## 3 0.9979171     0    Fold3
ad_tda_pc_5.50.5_n5_nb_fit_re<-Adult_TDA_PC_5.50.5_n5_NbFit0$resample[1]

summary(Adult_TDA_PC_5.50.5_n5_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_PC_5.50.5_n5_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n5_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n5_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n5_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n5_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n5_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.7592138      0.0000000      0.7506071      0.7676657      0.7592138 
## AccuracyPValue  McnemarPValue 
##      0.5055358      0.0000000
ad_tda_pc_5.50.5_n5_nb_cf0_ov_acc<-ad_tda_pc_5.50.5_n5_nb_cf0$overall[1]
ad_tda_pc_5.50.5_n5_nb_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            1.0000000            0.0000000            0.7592138 
##       Neg Pred Value            Precision               Recall 
##                  NaN            0.7592138            1.0000000 
##                   F1           Prevalence       Detection Rate 
##            0.8631285            0.7592138            0.7592138 
## Detection Prevalence    Balanced Accuracy 
##            1.0000000            0.5000000
ad_tda_pc_5.50.5_n5_nb_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n5_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_nb_n5_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.50.5_n5_nb_fit_re)
diff_tda_pca_5.50.5_nb_n5_3_fold
##     Accuracy
## 1 -0.2377937
## 2 -0.2377490
## 3 -0.2365325
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nb.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n5_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nb.n5_3_fold_odds.left<-bst_tda_pca_5.50.5_nb.n5_3_fold$probLeft/bst_tda_pca_5.50.5_nb.n5_3_fold$probRight
bst_tda_pca_5.50.5_nb.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nb.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n5_3_fold
## $winLeft
## [1] 0.9904667
## 
## $winRope
## [1] 0.009533333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nb.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n5_3_fold
## $left
## [1] 0.9999978
## 
## $rope
## [1] 3.415999e-07
## 
## $right
## [1] 1.859851e-06
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nb_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nb.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb_n5_3_fold))
#bf_tda_pca_5.50.5_nb.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nb_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_nb_n5_3_fold)
## t = -574.5, df = 2, p-value = 3.03e-06
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.2391361 -0.2355808
## sample estimates:
##  mean of x 
## -0.2373584
### Test set diff
diff_tda_pca_5.50.5_nb.n5_test<-(nb_cf_ov_acc - ad_tda_pc_5.50.5_n5_nb_cf0_ov_acc)
diff_tda_pca_5.50.5_nb.n5_test
##   Accuracy 
## 0.01617527
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nb.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb.n5_test),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nb.n5_test_odds.left<-bst_tda_pca_5.50.5_nb.n5_test$probLeft/bst_tda_pca_5.50.5_nb.n5_test$probRight
bst_tda_pca_5.50.5_nb.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nb.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb.n5_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.4539333
## 
## $winRight
## [1] 0.5460667
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nb.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nb.n5_test)))

#BayesFactor
#bf_tda_pca_5.50.5_nb.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb.n5_test)) #bf_tda_pca_5.50.5_nb.n5_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nb.n5_test))

##With TDA KDE filter 5 intervals, 50% overlap, 5 bins 
##Node1

Adult_TDA_KDE_5.50.5_n1_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n1.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Ecuador, V14.Holand.Netherlands, V14.Ireland, V14.Outlying.US.Guam.USVI.etc.
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc., V14.Portugal, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.50.5_n1_NbFit0
## Naive Bayes 
## 
## 13387 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8925, 8924, 8925 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa     
##   FALSE            NaN         NaN
##    TRUE      0.7465455  0.03200911
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_KDE_5.50.5_n1_NbFit0$resample
##    Accuracy       Kappa Resample
## 1 0.7563873 0.087090012    Fold1
## 2 0.7423258 0.008937319    Fold2
## 3 0.7409234 0.000000000    Fold3
ad_tda_kde_5.50.5_n1_nb_fit_re<-Adult_TDA_KDE_5.50.5_n1_NbFit0$resample[1]

summary(Adult_TDA_KDE_5.50.5_n1_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_KDE_5.50.5_n1_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n1_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n1_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n1_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2190
##      >50K       0   162
##                                          
##                Accuracy : 0.7758         
##                  95% CI : (0.7674, 0.784)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 5.889e-05      
##                                          
##                   Kappa : 0.101          
##                                          
##  Mcnemar's Test P-Value : < 2.2e-16      
##                                          
##             Sensitivity : 1.00000        
##             Specificity : 0.06888        
##          Pos Pred Value : 0.77202        
##          Neg Pred Value : 1.00000        
##              Prevalence : 0.75921        
##          Detection Rate : 0.75921        
##    Detection Prevalence : 0.98342        
##       Balanced Accuracy : 0.53444        
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_kde_5.50.5_n1_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2190
##      >50K       0   162
##                                          
##                Accuracy : 0.7758         
##                  95% CI : (0.7674, 0.784)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 5.889e-05      
##                                          
##                   Kappa : 0.101          
##                                          
##  Mcnemar's Test P-Value : < 2.2e-16      
##                                          
##             Sensitivity : 1.00000        
##             Specificity : 0.06888        
##          Pos Pred Value : 0.77202        
##          Neg Pred Value : 1.00000        
##              Prevalence : 0.75921        
##          Detection Rate : 0.75921        
##    Detection Prevalence : 0.98342        
##       Balanced Accuracy : 0.53444        
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_kde_5.50.5_n1_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.757985e-01   1.009798e-01   7.673951e-01   7.840372e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   5.888906e-05   0.000000e+00
ad_tda_kde_5.50.5_n1_nb_cf0_ov_acc<-ad_tda_kde_5.50.5_n1_nb_cf0$overall[1]
ad_tda_kde_5.50.5_n1_nb_cf0$byClas1
## NULL
ad_tda_kde_5.50.5_n1_nb_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n1_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_nb_n1_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.50.5_n1_nb_fit_re)
diff_tda_kde_5.50.5_nb_n1_3_fold
##      Accuracy
## 1 0.003944396
## 2 0.017842698
## 3 0.020461222
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nb.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n1_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nb.n1_3_fold_odds.left<-bst_tda_kde_5.50.5_nb.n1_3_fold$probLeft/bst_tda_kde_5.50.5_nb.n1_3_fold$probRight
bst_tda_kde_5.50.5_nb.n1_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nb.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n1_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.2127
## 
## $winRight
## [1] 0.7873
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nb.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n1_3_fold
## $left
## [1] 0.02770818
## 
## $rope
## [1] 0.2530757
## 
## $right
## [1] 0.7192162
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nb_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nb.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb_n1_3_fold))
#bf_tda_kde_5.50.5_nb.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nb_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_nb_n1_3_fold)
## t = 2.7477, df = 2, p-value = 0.1109
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.007969344  0.036134888
## sample estimates:
##  mean of x 
## 0.01408277
### Test set diff
diff_tda_kde_5.50.5_nb.n1_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n1_nb_cf0_ov_acc)
diff_tda_kde_5.50.5_nb.n1_test
##   Accuracy 
## 0.05200655
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nb.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb.n1_test),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n1_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nb.n1_test_odds.left<-bst_tda_kde_5.50.5_nb.n1_test$probLeft/bst_tda_kde_5.50.5_nb.n1_test$probRight
bst_tda_kde_5.50.5_nb.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nb.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb.n1_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n1_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1570667
## 
## $winRight
## [1] 0.8429333
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nb.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nb.n1_test)))

#BayesFactor
#bf_tda_kde_5.50.5_nb.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb.n1_test)) #bf_tda_pca_5.50.5_nb.n1_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nb.n1_test))

##Node2

Adult_TDA_KDE_5.50.5_n2_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n2.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv, V8.Unmarried, V14.Dominican.Republic, V14.France, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Thailand, V14.Trinadad.Tobago
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv, V8.Unmarried, V14.Dominican.Republic, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras, V14.Ireland, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv, V8.Unmarried, V14.Dominican.Republic, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.50.5_n2_NbFit0
## Naive Bayes 
## 
## 12206 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8138, 8138, 8136 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy  Kappa    
##   FALSE           NaN        NaN
##    TRUE      0.642652  0.2948972
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_KDE_5.50.5_n2_NbFit0$resample
##    Accuracy     Kappa Resample
## 1 0.6701082 0.3065371    Fold1
## 2 0.7202557 0.4390306    Fold2
## 3 0.5375921 0.1391239    Fold3
ad_tda_kde_5.50.5_n2_nb_fit_re<-Adult_TDA_KDE_5.50.5_n2_NbFit0$resample[1]

summary(Adult_TDA_KDE_5.50.5_n2_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_KDE_5.50.5_n2_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n2_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n2_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n2_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   3598  1573
##      >50K    3818   779
##                                          
##                Accuracy : 0.4481         
##                  95% CI : (0.4382, 0.458)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 1              
##                                          
##                   Kappa : -0.1385        
##                                          
##  Mcnemar's Test P-Value : <2e-16         
##                                          
##             Sensitivity : 0.4852         
##             Specificity : 0.3312         
##          Pos Pred Value : 0.6958         
##          Neg Pred Value : 0.1695         
##              Prevalence : 0.7592         
##          Detection Rate : 0.3683         
##    Detection Prevalence : 0.5294         
##       Balanced Accuracy : 0.4082         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_kde_5.50.5_n2_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   3598  1573
##      >50K    3818   779
##                                          
##                Accuracy : 0.4481         
##                  95% CI : (0.4382, 0.458)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 1              
##                                          
##                   Kappa : -0.1385        
##                                          
##  Mcnemar's Test P-Value : <2e-16         
##                                          
##             Sensitivity : 0.4852         
##             Specificity : 0.3312         
##          Pos Pred Value : 0.6958         
##          Neg Pred Value : 0.1695         
##              Prevalence : 0.7592         
##          Detection Rate : 0.3683         
##    Detection Prevalence : 0.5294         
##       Balanced Accuracy : 0.4082         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_kde_5.50.5_n2_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   4.480958e-01  -1.384920e-01   4.381994e-01   4.580232e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.000000e+00  3.863576e-205
ad_tda_kde_5.50.5_n2_nb_cf0_ov_acc<-ad_tda_kde_5.50.5_n2_nb_cf0$overall[1]
ad_tda_kde_5.50.5_n2_nb_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.4851672            0.3312075            0.6958035 
##       Neg Pred Value            Precision               Recall 
##            0.1694583            0.6958035            0.4851672 
##                   F1           Prevalence       Detection Rate 
##            0.5717010            0.7592138            0.3683456 
## Detection Prevalence    Balanced Accuracy 
##            0.5293817            0.4081873
ad_tda_kde_5.50.5_n2_nb_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n2_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_nb_n2_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.50.5_n2_nb_fit_re)
diff_tda_kde_5.50.5_nb_n2_3_fold
##     Accuracy
## 1 0.09022350
## 2 0.03991283
## 3 0.22379244
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nb.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nb.n2_3_fold_odds.left<-bst_tda_kde_5.50.5_nb.n2_3_fold$probLeft/bst_tda_kde_5.50.5_nb.n2_3_fold$probRight
bst_tda_kde_5.50.5_nb.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nb.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.009333333
## 
## $winRight
## [1] 0.9906667
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nb.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n2_3_fold
## $left
## [1] 0.09040089
## 
## $rope
## [1] 0.02481347
## 
## $right
## [1] 0.8847856
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nb_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nb.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb_n2_3_fold))
#bf_tda_kde_5.50.5_nb.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nb_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_nb_n2_3_fold)
## t = 2.1503, df = 2, p-value = 0.1645
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1180899  0.3540424
## sample estimates:
## mean of x 
## 0.1179763
### Test set diff
diff_tda_kde_5.50.5_nb.n2_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n2_nb_cf0_ov_acc)
diff_tda_kde_5.50.5_nb.n2_test
##  Accuracy 
## 0.3797093
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nb.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb.n2_test),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nb.n2_test_odds.left<-bst_tda_kde_5.50.5_nb.n2_test$probLeft/bst_tda_kde_5.50.5_nb.n2_test$probRight
bst_tda_kde_5.50.5_nb.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nb.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb.n2_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1608
## 
## $winRight
## [1] 0.8392
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nb.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nb.n2_test)))

#BayesFactor
#bf_tda_kde_5.50.5_nb.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb.n2_test)) #bf_tda_kde_5.50.5_nb.n2_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nb.n2_test))

##Node3

Adult_TDA_KDE_5.50.5_n3_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n3.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Columbia, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Outlying.US.Guam.USVI.etc., V14.Portugal, V14.Thailand
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.50.5_n3_NbFit0
## Naive Bayes 
## 
## 13240 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8827, 8826, 8827 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa      
##   FALSE            NaN          NaN
##    TRUE      0.7725076  0.007081792
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_KDE_5.50.5_n3_NbFit0$resample
##    Accuracy      Kappa Resample
## 1 0.7715840 0.00000000    Fold1
## 2 0.7714092 0.00000000    Fold2
## 3 0.7745298 0.02124538    Fold3
ad_tda_kde_5.50.5_n3_nb_fit_re<-Adult_TDA_KDE_5.50.5_n3_NbFit0$resample[1]

summary(Adult_TDA_KDE_5.50.5_n3_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_KDE_5.50.5_n3_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n3_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n3_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n3_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2339
##      >50K       0    13
##                                         
##                Accuracy : 0.7605        
##                  95% CI : (0.752, 0.769)
##     No Information Rate : 0.7592        
##     P-Value [Acc > NIR] : 0.3844        
##                                         
##                   Kappa : 0.0084        
##                                         
##  Mcnemar's Test P-Value : <2e-16        
##                                         
##             Sensitivity : 1.000000      
##             Specificity : 0.005527      
##          Pos Pred Value : 0.760226      
##          Neg Pred Value : 1.000000      
##              Prevalence : 0.759214      
##          Detection Rate : 0.759214      
##    Detection Prevalence : 0.998669      
##       Balanced Accuracy : 0.502764      
##                                         
##        'Positive' Class :  <=50K        
## 
ad_tda_kde_5.50.5_n3_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2339
##      >50K       0    13
##                                         
##                Accuracy : 0.7605        
##                  95% CI : (0.752, 0.769)
##     No Information Rate : 0.7592        
##     P-Value [Acc > NIR] : 0.3844        
##                                         
##                   Kappa : 0.0084        
##                                         
##  Mcnemar's Test P-Value : <2e-16        
##                                         
##             Sensitivity : 1.000000      
##             Specificity : 0.005527      
##          Pos Pred Value : 0.760226      
##          Neg Pred Value : 1.000000      
##              Prevalence : 0.759214      
##          Detection Rate : 0.759214      
##    Detection Prevalence : 0.998669      
##       Balanced Accuracy : 0.502764      
##                                         
##        'Positive' Class :  <=50K        
## 
ad_tda_kde_5.50.5_n3_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##    0.760544636    0.008368689    0.751953582    0.768980076    0.759213759 
## AccuracyPValue  McnemarPValue 
##    0.384402497    0.000000000
ad_tda_kde_5.50.5_n3_nb_cf0_ov_acc<-ad_tda_kde_5.50.5_n3_nb_cf0$overall[1]
ad_tda_kde_5.50.5_n3_nb_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##          1.000000000          0.005527211          0.760225525 
##       Neg Pred Value            Precision               Recall 
##          1.000000000          0.760225525          1.000000000 
##                   F1           Prevalence       Detection Rate 
##          0.863781958          0.759213759          0.759213759 
## Detection Prevalence    Balanced Accuracy 
##          0.998669124          0.502763605
ad_tda_kde_5.50.5_n3_nb_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n3_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_nb_n3_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.50.5_n3_nb_fit_re)
diff_tda_kde_5.50.5_nb_n3_3_fold
##      Accuracy
## 1 -0.01125229
## 2 -0.01124067
## 3 -0.01314522
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nb.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n3_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nb.n3_3_fold_odds.left<-bst_tda_kde_5.50.5_nb.n3_3_fold$probLeft/bst_tda_kde_5.50.5_nb.n3_3_fold$probRight
bst_tda_kde_5.50.5_nb.n3_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nb.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n3_3_fold
## $winLeft
## [1] 0.8339333
## 
## $winRope
## [1] 0.1660667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nb.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n3_3_fold
## $left
## [1] 0.9381183
## 
## $rope
## [1] 0.06132473
## 
## $right
## [1] 0.000556949
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nb_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nb.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb_n3_3_fold))
#bf_tda_kde_5.50.5_nb.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nb_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_nb_n3_3_fold)
## t = -18.769, df = 2, p-value = 0.002827
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.01460265 -0.00915614
## sample estimates:
##   mean of x 
## -0.01187939
### Test set diff
diff_tda_kde_5.50.5_nb.n3_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n3_nb_cf0_ov_acc)
diff_tda_kde_5.50.5_nb.n3_test
##   Accuracy 
## 0.06726044
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nb.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb.n3_test),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n3_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nb.n3_test_odds.left<-bst_tda_kde_5.50.5_nb.n3_test$probLeft/bst_tda_kde_5.50.5_nb.n3_test$probRight
bst_tda_kde_5.50.5_nb.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nb.n3_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb.n3_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n3_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1606333
## 
## $winRight
## [1] 0.8393667
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nb.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nb.n3_test)))

#BayesFactor
#bf_tda_kde_5.50.5_nb.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb.n3_test)) #bf_tda_kde_5.50.5_nb.n3_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nb.n3_test))


##Node4

Adult_TDA_KDE_5.50.5_n4_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n4.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V8.Husband, V14.Cambodia, V14.Columbia, V14.Ecuador, V14.Greece, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Jamaica, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Portugal, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Yugoslavia
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V8.Husband, V14.Cambodia, V14.Columbia, V14.Ecuador, V14.El.Salvador, V14.France, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Iran, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V8.Husband, V14.Cambodia, V14.Columbia, V14.Ecuador, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.India, V14.Iran, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Poland, V14.Thailand, V14.Trinadad.Tobago
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.50.5_n4_NbFit0
## Naive Bayes 
## 
## 16700 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 11133, 11134, 11133 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa
##   FALSE            NaN  NaN  
##    TRUE      0.9449102    0  
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_KDE_5.50.5_n4_NbFit0$resample
##    Accuracy Kappa Resample
## 1 0.9448536     0    Fold1
## 2 0.9450234     0    Fold2
## 3 0.9448536     0    Fold3
ad_tda_kde_5.50.5_n4_nb_fit_re<-Adult_TDA_KDE_5.50.5_n4_NbFit0$resample[1]

summary(Adult_TDA_KDE_5.50.5_n4_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_KDE_5.50.5_n4_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n4_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n4_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n4_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n4_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n4_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.7592138      0.0000000      0.7506071      0.7676657      0.7592138 
## AccuracyPValue  McnemarPValue 
##      0.5055358      0.0000000
ad_tda_kde_5.50.5_n4_nb_cf0_ov_acc<-ad_tda_kde_5.50.5_n4_nb_cf0$overall[1]
ad_tda_kde_5.50.5_n4_nb_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            1.0000000            0.0000000            0.7592138 
##       Neg Pred Value            Precision               Recall 
##                  NaN            0.7592138            1.0000000 
##                   F1           Prevalence       Detection Rate 
##            0.8631285            0.7592138            0.7592138 
## Detection Prevalence    Balanced Accuracy 
##            1.0000000            0.5000000
ad_tda_kde_5.50.5_n4_nb_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n4_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_nb_n4_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.50.5_n4_nb_fit_re)
diff_tda_kde_5.50.5_nb_n4_3_fold
##     Accuracy
## 1 -0.1845219
## 2 -0.1848549
## 3 -0.1834690
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nb.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n4_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nb.n4_3_fold_odds.left<-bst_tda_kde_5.50.5_nb.n4_3_fold$probLeft/bst_tda_kde_5.50.5_nb.n4_3_fold$probRight
bst_tda_kde_5.50.5_nb.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nb.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n4_3_fold
## $winLeft
## [1] 0.9904667
## 
## $winRope
## [1] 0.009533333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nb.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n4_3_fold
## $left
## [1] 0.9999962
## 
## $rope
## [1] 7.47709e-07
## 
## $right
## [1] 3.081056e-06
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nb_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nb.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb_n4_3_fold))
#bf_tda_kde_5.50.5_nb.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nb_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_nb_n4_3_fold)
## t = -441.22, df = 2, p-value = 5.137e-06
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1860790 -0.1824849
## sample estimates:
##  mean of x 
## -0.1842819
### Test set diff
diff_tda_kde_5.50.5_nb.n4_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n4_nb_cf0_ov_acc)
diff_tda_kde_5.50.5_nb.n4_test
##   Accuracy 
## 0.06859132
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nb.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb.n4_test),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nb.n4_test_odds.left<-bst_tda_kde_5.50.5_nb.n4_test$probLeft/bst_tda_kde_5.50.5_nb.n4_test$probRight
bst_tda_kde_5.50.5_nb.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nb.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb.n4_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1557333
## 
## $winRight
## [1] 0.8442667
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nb.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nb.n4_test)))

#BayesFactor
#bf_tda_kde_5.50.5_nb.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb.n4_test)) #bf_tda_kde_5.50.5_nb.n4_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nb.n4_test))

##Node5

Adult_TDA_KDE_5.50.5_n5_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n5.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.., V2.Local.gov, V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.Without.pay, V4.10th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Separated, V7.., V7.Armed.Forces, V7.Craft.repair, V7.Farming.fishing, V7.Handlers.cleaners, V7.Priv.house.serv, V7.Protective.serv, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Other.relative, V8.Wife, V9.Amer.Indian.Eskimo, V9.Other, V10.Female, V10.Male, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.., V2.Federal.gov, V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.Without.pay, V4.10th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V6.Married.AF.spouse, V6.Married.civ.spouse, V7.., V7.Armed.Forces, V7.Craft.repair, V7.Exec.managerial, V7.Farming.fishing, V7.Handlers.cleaners, V7.Priv.house.serv, V7.Prof.specialty, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Other.relative, V8.Wife, V9.Amer.Indian.Eskimo, V9.Asian.Pac.Islander, V9.Other, V10.Female, V10.Male, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.India, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.., V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.State.gov, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V6.Married.AF.spouse, V6.Married.civ.spouse, V7.., V7.Armed.Forces, V7.Craft.repair, V7.Farming.fishing, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Priv.house.serv, V7.Sales, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Other.relative, V8.Wife, V9.Amer.Indian.Eskimo, V9.Other, V10.Female, V10.Male, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.50.5_n5_NbFit0
## Naive Bayes 
## 
## 14404 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 9603, 9602, 9603 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa
##   FALSE            NaN  NaN  
##    TRUE      0.9979867    0  
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_KDE_5.50.5_n5_NbFit0$resample
##    Accuracy Kappa Resample
## 1 0.9981254     0    Fold1
## 2 0.9979175     0    Fold2
## 3 0.9979171     0    Fold3
ad_tda_kde_5.50.5_n5_nb_fit_re<-Adult_TDA_KDE_5.50.5_n5_NbFit0$resample[1]

summary(Adult_TDA_KDE_5.50.5_n5_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_KDE_5.50.5_n5_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n5_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n5_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n5_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n5_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n5_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.7592138      0.0000000      0.7506071      0.7676657      0.7592138 
## AccuracyPValue  McnemarPValue 
##      0.5055358      0.0000000
ad_tda_kde_5.50.5_n5_nb_cf0_ov_acc<-ad_tda_kde_5.50.5_n5_nb_cf0$overall[1]
ad_tda_kde_5.50.5_n5_nb_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            1.0000000            0.0000000            0.7592138 
##       Neg Pred Value            Precision               Recall 
##                  NaN            0.7592138            1.0000000 
##                   F1           Prevalence       Detection Rate 
##            0.8631285            0.7592138            0.7592138 
## Detection Prevalence    Balanced Accuracy 
##            1.0000000            0.5000000
ad_tda_kde_5.50.5_n5_nb_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n5_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_nb_n5_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.50.5_n5_nb_fit_re)
diff_tda_kde_5.50.5_nb_n5_3_fold
##     Accuracy
## 1 -0.2377937
## 2 -0.2377490
## 3 -0.2365325
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nb.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n5_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nb.n5_3_fold_odds.left<-bst_tda_kde_5.50.5_nb.n5_3_fold$probLeft/bst_tda_kde_5.50.5_nb.n5_3_fold$probRight
bst_tda_kde_5.50.5_nb.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nb.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n5_3_fold
## $winLeft
## [1] 0.9914
## 
## $winRope
## [1] 0.0086
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nb.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n5_3_fold
## $left
## [1] 0.9999978
## 
## $rope
## [1] 3.415999e-07
## 
## $right
## [1] 1.859851e-06
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nb_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nb.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb_n5_3_fold))
#bf_tda_kde_5.50.5_nb.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nb_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_nb_n5_3_fold)
## t = -574.5, df = 2, p-value = 3.03e-06
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.2391361 -0.2355808
## sample estimates:
##  mean of x 
## -0.2373584
### Test set diff
diff_tda_kde_5.50.5_nb.n5_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n5_nb_cf0_ov_acc)
diff_tda_kde_5.50.5_nb.n5_test
##   Accuracy 
## 0.06859132
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nb.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb.n5_test),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nb.n5_test_odds.left<-bst_tda_kde_5.50.5_nb.n5_test$probLeft/bst_tda_kde_5.50.5_nb.n5_test$probRight
bst_tda_kde_5.50.5_nb.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nb.n5_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb.n5_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1628
## 
## $winRight
## [1] 0.8372
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nb.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nb.n5_test)))

#BayesFactor
#bf_tda_kde_5.50.5_nb.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb.n5_test)) #bf_tda_kde_5.50.5_nb.n5_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nb.n5_test))